Loading Data set¶
In [2]:
!pip install openpyxl
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
nltk.download('stopwords')
# Load the Excel file
xls = pd.ExcelFile('https://github.com/saifrahmania/Data36118/raw/refs/heads/main/Assignment1/Data/ASCDataset/Australian%20Skills%20Classification%20-%20December%202023.xlsx')
# Dictionary to hold all DataFrames, one for each sheet
sheets_dict = {}
for sheet_name in xls.sheet_names:
# Load each sheet into a DataFrame
sheets_dict[sheet_name] = pd.read_excel(xls, sheet_name=sheet_name)
xls.sheet_names
Collecting openpyxl Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB) Collecting et-xmlfile (from openpyxl) Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB) Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 250.9/250.9 kB 4.8 MB/s eta 0:00:00 Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB) Installing collected packages: et-xmlfile, openpyxl Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date!
Out[2]:
['Index', 'Glossary', 'Occupation descriptions', 'Core competency descriptions', 'Specialist tasks hierarchy', 'Tech tools heirarchy', 'Core competencies', 'Specialist tasks data', 'Technology tools', 'Appendix - tech tool examples', 'Appendix - common tech tools']
Data Inspection¶
In [4]:
for sheet_name in xls.sheet_names:
print(f"Columns in sheet '{sheet_name}':")
print(sheets_dict[sheet_name].columns.tolist())
print("-" * 20)
Columns in sheet 'Index': ['Unnamed: 0', 'Unnamed: 1'] -------------------- Columns in sheet 'Glossary': ['Glossary of key terms', 'Unnamed: 1'] -------------------- Columns in sheet 'Occupation descriptions': ['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'ANZSCO Description'] -------------------- Columns in sheet 'Core competency descriptions': ['Core Competency', 'Core Competency Description', 'Score', 'Proficiency Level', 'Anchor Value'] -------------------- Columns in sheet 'Specialist tasks hierarchy': ['Specialist Task', 'Specialist Cluster', 'Cluster Family', 'Skill Statement'] -------------------- Columns in sheet 'Tech tools heirarchy': ['Technology Tool Category', 'Technology Tool Category Description', 'Technology Tool', 'Technology Tool Description', 'Technology Tool Extended Description'] -------------------- Columns in sheet 'Core competencies': ['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'Core Competency', 'Score', 'Proficiency Level', 'Anchor Value'] -------------------- Columns in sheet 'Specialist tasks data': ['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'Specialist Task', '% of time spent on task', 'Emerging/\nTrending Flag', 'Specialist Cluster', ' % of time spent on cluster', 'Cluster Family', '% of time spent on family', 'Skills Statement'] -------------------- Columns in sheet 'Technology tools': ['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'Technology Tool', 'Emerging/Trending Flag'] -------------------- Columns in sheet 'Appendix - tech tool examples': ['Technology Tool', 'Technology Tool Example'] -------------------- Columns in sheet 'Appendix - common tech tools': ['Common Technology Tools'] --------------------
Dividing Sheets¶
In [5]:
sheets_dict = {}
for sheet_name in xls.sheet_names:
# Load each sheet into a DataFrame
sheets_dict[sheet_name] = pd.read_excel(xls, sheet_name=sheet_name)
# Accessing specific sheets and their data
index_df = sheets_dict['Index']
glossary_df = sheets_dict['Glossary']
occupation_descriptions_df = sheets_dict['Occupation descriptions']
core_competency_descriptions_df = sheets_dict['Core competency descriptions']
specialist_tasks_hierarchy_df = sheets_dict['Specialist tasks hierarchy']
tech_tools_hierarchy_df = sheets_dict['Tech tools heirarchy']
core_competencies_df = sheets_dict['Core competencies']
specialist_tasks_data_df = sheets_dict['Specialist tasks data']
technology_tools_df = sheets_dict['Technology tools']
appendix_tech_tool_examples_df = sheets_dict['Appendix - tech tool examples']
appendix_common_tech_tools_df = sheets_dict['Appendix - common tech tools']
# Now you can work with each DataFrame individually
# Example: Print the first 5 rows of the 'Occupation descriptions' sheet
index_df.head()
Out[5]:
| Unnamed: 0 | Unnamed: 1 | |
|---|---|---|
| 0 | Australian Skills Classification | NaN |
| 1 | Version 3.0 - current at December 2023 - updat... | NaN |
| 2 | Index | NaN |
| 3 | Glossary | Explanation of key terms used in this dataset. |
| 4 | Occupation descriptions | ANZSCO Occupation codes and descriptions.\nSub... |
In [6]:
specialist_tasks_data_df.columns.tolist()
Out[6]:
['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'Specialist Task', '% of time spent on task', 'Emerging/\nTrending Flag', 'Specialist Cluster', ' % of time spent on cluster', 'Cluster Family', '% of time spent on family', 'Skills Statement']
In [7]:
print(core_competencies_df.columns.tolist())
['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'Core Competency', 'Score', 'Proficiency Level', 'Anchor Value']
In [8]:
try:
core_competencies_df = core_competencies_df.drop(columns=['Sub-Profile Code'])
except KeyError:
print("Column 'Sub-Profile Code' not found in 'core_competencies_df'")
try:
specialist_tasks_data_df = specialist_tasks_data_df.drop(columns=['Sub-Profile Code'])
except KeyError:
print("Column 'Sub-Profile Code' not found in 'specialist_tasks_data_df'")
In [9]:
core_competencies_df.shape
Out[9]:
(11030, 7)
Start merging Sheets¶
In [10]:
import pandas as pd
# Assuming specialist_tasks_data_df and core_competencies_df are loaded DataFrames
# First, let's ensure there are no duplicates within core_competencies_df that could cause multiple matches
core_competencies_df = core_competencies_df.drop_duplicates(subset=['Occupation Type', 'ANZSCO Title'], keep='first')
# Perform a left join with core_competencies_df to append matching data
merged_df = pd.merge(specialist_tasks_data_df, core_competencies_df,
on=['Occupation Type', 'ANZSCO Title'],
how='left')
# Check and print the number of rows and structure to ensure it matches expectations
print("After merge, DataFrame size: ", merged_df.shape)
print(merged_df.head())
# Optionally, check for any rows that might still have missing data indicating no match was found
unmatched_indicator = merged_df.isna().any(axis=1)
print("Number of unmatched rows: ", unmatched_indicator.sum())
# This approach ensures we do not inadvertently increase the number of rows in specialist_tasks_data_df.
After merge, DataFrame size: (30450, 16)
Occupation Type ANZSCO Code_x ANZSCO Title \
0 ANZSCO 4 1111 Chief Executives and Managing Directors
1 ANZSCO 4 1111 Chief Executives and Managing Directors
2 ANZSCO 4 1111 Chief Executives and Managing Directors
3 ANZSCO 4 1111 Chief Executives and Managing Directors
4 ANZSCO 4 1111 Chief Executives and Managing Directors
Specialist Task % of time spent on task \
0 Direct or manage financial activities or opera... 0.1302
1 Direct department or organisational activities 0.1117
2 Direct sales, marketing or customer service ac... 0.0808
3 Communicate with others to arrange, coordinate... 0.0665
4 Analyse data to assess operational or project ... 0.0651
Emerging/\nTrending Flag Specialist Cluster \
0 NaN Manage, monitor and undertake financial activi...
1 Trending Manage services, staff or activities
2 NaN Manage services, staff or activities
3 NaN Communicate or collaborate with others
4 NaN Use data to inform operational decisions
% of time spent on cluster Cluster Family \
0 0.1644 Business operations and financial activities
1 0.2128 Business operations and financial activities
2 0.2128 Business operations and financial activities
3 0.0750 Communication and collaboration
4 0.1009 Data, analytics, and databases
% of time spent on family \
0 0.5322
1 0.5322
2 0.5322
3 0.0890
4 0.1370
Skills Statement ANZSCO Code_y \
0 Direct and oversee the financial operations of... 1111.0
1 Direct and oversee the activities of a work un... 1111.0
2 Direct and oversee the sales, marketing, or cu... 1111.0
3 Coordinate with others in order to plan, organ... 1111.0
4 Analyse qualitative and quantitative data aris... 1111.0
Core Competency Score Proficiency Level \
0 Digital engagement 6.0 Intermediate
1 Digital engagement 6.0 Intermediate
2 Digital engagement 6.0 Intermediate
3 Digital engagement 6.0 Intermediate
4 Digital engagement 6.0 Intermediate
Anchor Value
0 Use software on a portable device to document ...
1 Use software on a portable device to document ...
2 Use software on a portable device to document ...
3 Use software on a portable device to document ...
4 Use software on a portable device to document ...
Number of unmatched rows: 29048
In [11]:
merged_df.shape
Out[11]:
(30450, 16)
In [12]:
merged_df.columns.tolist()
Out[12]:
['Occupation Type', 'ANZSCO Code_x', 'ANZSCO Title', 'Specialist Task', '% of time spent on task', 'Emerging/\nTrending Flag', 'Specialist Cluster', ' % of time spent on cluster', 'Cluster Family', '% of time spent on family', 'Skills Statement', 'ANZSCO Code_y', 'Core Competency', 'Score', 'Proficiency Level', 'Anchor Value']
In [13]:
try:
merged_df = merged_df.drop(columns=['% of time spent on task', 'Emerging/\nTrending Flag', ' % of time spent on cluster', '% of time spent on family', 'ANZSCO Code_y'])
except KeyError as e:
print(f"Column not found: {e}")
In [14]:
merged_df.shape
Out[14]:
(30450, 11)
In [15]:
# Count empty rows in each column of merged_df
empty_rows_per_column = merged_df.isnull().sum()
# Print the column names and the number of empty rows for each
for column, empty_count in empty_rows_per_column.items():
print(f"Column '{column}': {empty_count} empty rows")
Column 'Occupation Type': 0 empty rows Column 'ANZSCO Code_x': 0 empty rows Column 'ANZSCO Title': 0 empty rows Column 'Specialist Task': 0 empty rows Column 'Specialist Cluster': 0 empty rows Column 'Cluster Family': 0 empty rows Column 'Skills Statement': 0 empty rows Column 'Core Competency': 6568 empty rows Column 'Score': 6568 empty rows Column 'Proficiency Level': 6568 empty rows Column 'Anchor Value': 6568 empty rows
In [16]:
# Drop rows where 'Core Competency', 'Score', and 'Anchor Value' are all null
merged_df = merged_df.dropna(subset=['Core Competency', 'Score', 'Anchor Value'], how='all')
In [17]:
merged_df.shape
Out[17]:
(23882, 11)
In [18]:
merged_df.head()
Out[18]:
| Occupation Type | ANZSCO Code_x | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | Anchor Value | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct or manage financial activities or opera... | Manage, monitor and undertake financial activi... | Business operations and financial activities | Direct and oversee the financial operations of... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 1 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct department or organisational activities | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the activities of a work un... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 2 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct sales, marketing or customer service ac... | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the sales, marketing, or cu... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 3 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Communicate with others to arrange, coordinate... | Communicate or collaborate with others | Communication and collaboration | Coordinate with others in order to plan, organ... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 4 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Analyse data to assess operational or project ... | Use data to inform operational decisions | Data, analytics, and databases | Analyse qualitative and quantitative data aris... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
In [19]:
merged_df.columns.tolist()
merged_df.shape
Out[19]:
(23882, 11)
In [20]:
try:
merged_df = merged_df.rename(columns={'ANZSCO Code_x': 'ANZSCO Code'})
except KeyError as e:
print(f"Column not found: {e}")
try:
merged_df = merged_df.drop(columns=['Technology Tool_x', 'Technology Tool_y'])
except KeyError as e:
print(f"Column not found: {e}")
Column not found: "['Technology Tool_x', 'Technology Tool_y'] not found in axis"
In [21]:
merged_df.columns.tolist()
occupation_descriptions_df.columns.tolist()
Out[21]:
['Occupation Type', 'ANZSCO Code', 'Sub-Profile Code', 'ANZSCO Title', 'ANZSCO Description']
In [22]:
both_empty = merged_df[merged_df['Score'].isnull() & merged_df['Proficiency Level'].isnull()].shape[0]
print(f"Number of rows with both 'Score' and 'Proficiency Level' empty: {both_empty}")
# Count rows where 'Score' is empty/null/NaN but 'Proficiency Level' has a value
score_empty_proficiency_not = merged_df[merged_df['Score'].isnull() & merged_df['Proficiency Level'].notnull()].shape[0]
print(f"Number of rows with 'Score' empty but 'Proficiency Level' not empty: {score_empty_proficiency_not}")
# Count rows where 'Score' has a value but 'Proficiency Level' is empty/null/NaN
score_not_empty_proficiency_empty = merged_df[merged_df['Score'].notnull() & merged_df['Proficiency Level'].isnull()].shape[0]
print(f"Number of rows with 'Score' not empty but 'Proficiency Level' empty: {score_not_empty_proficiency_empty}")
# Count rows where either 'Score' or 'Proficiency Level' is empty/null/NaN
either_empty = merged_df[merged_df['Score'].isnull() | merged_df['Proficiency Level'].isnull()].shape[0]
print(f"Number of rows with either 'Score' or 'Proficiency Level' empty: {either_empty}")
Number of rows with both 'Score' and 'Proficiency Level' empty: 0 Number of rows with 'Score' empty but 'Proficiency Level' not empty: 0 Number of rows with 'Score' not empty but 'Proficiency Level' empty: 0 Number of rows with either 'Score' or 'Proficiency Level' empty: 0
In [23]:
# Calculate the mean of '% of time spent on task', '% of time spent on family', and 'Score', ignoring NaN values
mean_score = merged_df['Score'].mean()
merged_df['Score'].fillna(mean_score, inplace=True)
<ipython-input-23-02c996b9c362>:6: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
merged_df['Score'].fillna(mean_score, inplace=True)
In [24]:
empty_rows_per_column = merged_df.isnull().sum()
# Print the column names and the number of empty rows for each
for column, empty_count in empty_rows_per_column.items():
print(f"Column '{column}': {empty_count} empty rows")
Column 'Occupation Type': 0 empty rows Column 'ANZSCO Code': 0 empty rows Column 'ANZSCO Title': 0 empty rows Column 'Specialist Task': 0 empty rows Column 'Specialist Cluster': 0 empty rows Column 'Cluster Family': 0 empty rows Column 'Skills Statement': 0 empty rows Column 'Core Competency': 0 empty rows Column 'Score': 0 empty rows Column 'Proficiency Level': 0 empty rows Column 'Anchor Value': 0 empty rows
In [25]:
# Drop rows where 'Core Competency', 'Anchor Value', and 'Core Competency' are null/NaN
merged_df = merged_df.dropna(subset=['Core Competency', 'Anchor Value'], how='any')
In [26]:
merged_df.shape
Out[26]:
(23882, 11)
In [27]:
merged_df.head()
Out[27]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | Anchor Value | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct or manage financial activities or opera... | Manage, monitor and undertake financial activi... | Business operations and financial activities | Direct and oversee the financial operations of... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 1 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct department or organisational activities | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the activities of a work un... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 2 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Direct sales, marketing or customer service ac... | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the sales, marketing, or cu... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 3 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Communicate with others to arrange, coordinate... | Communicate or collaborate with others | Communication and collaboration | Coordinate with others in order to plan, organ... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
| 4 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Analyse data to assess operational or project ... | Use data to inform operational decisions | Data, analytics, and databases | Analyse qualitative and quantitative data aris... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... |
In [28]:
empty_rows_per_column = merged_df.isnull().sum()
# Print the column names and the number of empty rows for each
for column, empty_count in empty_rows_per_column.items():
print(f"Column '{column}': {empty_count} empty rows")
Column 'Occupation Type': 0 empty rows Column 'ANZSCO Code': 0 empty rows Column 'ANZSCO Title': 0 empty rows Column 'Specialist Task': 0 empty rows Column 'Specialist Cluster': 0 empty rows Column 'Cluster Family': 0 empty rows Column 'Skills Statement': 0 empty rows Column 'Core Competency': 0 empty rows Column 'Score': 0 empty rows Column 'Proficiency Level': 0 empty rows Column 'Anchor Value': 0 empty rows
In [29]:
# Drop the specified columns from technology_tools_df
technology_tools_df = technology_tools_df.drop(columns=['Emerging/Trending Flag', 'Sub-Profile Code'], errors='ignore')
# Display the updated DataFrame (optional)
technology_tools_df.head()
Out[29]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Technology Tool | |
|---|---|---|---|---|
| 0 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Accounting and financial management systems |
| 1 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Audio/video conferencing software |
| 2 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Business intelligence and decision support sof... |
| 3 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Flow chart and diagram software |
| 4 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Human resources software |
In [30]:
technology_tools_df.shape
Out[30]:
(5761, 4)
In [31]:
technology_tools_aggregated = technology_tools_df.groupby(['Occupation Type', 'ANZSCO Title'])['Technology Tool'].apply(', '.join).reset_index()
# Merge final_df with this aggregated dataframe
merged_df = pd.merge(merged_df, technology_tools_aggregated, on=['Occupation Type', 'ANZSCO Title'], how='left')
# Now, merged_df will have an additional column 'Technology Tool' from technology_tools_df
# This column contains concatenated strings of tools, ensuring no increase in row count
# Display the head of the merged dataframe to verify
print(merged_df.head())
Occupation Type ANZSCO Code ANZSCO Title \
0 ANZSCO 4 1111 Chief Executives and Managing Directors
1 ANZSCO 4 1111 Chief Executives and Managing Directors
2 ANZSCO 4 1111 Chief Executives and Managing Directors
3 ANZSCO 4 1111 Chief Executives and Managing Directors
4 ANZSCO 4 1111 Chief Executives and Managing Directors
Specialist Task \
0 Direct or manage financial activities or opera...
1 Direct department or organisational activities
2 Direct sales, marketing or customer service ac...
3 Communicate with others to arrange, coordinate...
4 Analyse data to assess operational or project ...
Specialist Cluster \
0 Manage, monitor and undertake financial activi...
1 Manage services, staff or activities
2 Manage services, staff or activities
3 Communicate or collaborate with others
4 Use data to inform operational decisions
Cluster Family \
0 Business operations and financial activities
1 Business operations and financial activities
2 Business operations and financial activities
3 Communication and collaboration
4 Data, analytics, and databases
Skills Statement Core Competency \
0 Direct and oversee the financial operations of... Digital engagement
1 Direct and oversee the activities of a work un... Digital engagement
2 Direct and oversee the sales, marketing, or cu... Digital engagement
3 Coordinate with others in order to plan, organ... Digital engagement
4 Analyse qualitative and quantitative data aris... Digital engagement
Score Proficiency Level Anchor Value \
0 6.0 Intermediate Use software on a portable device to document ...
1 6.0 Intermediate Use software on a portable device to document ...
2 6.0 Intermediate Use software on a portable device to document ...
3 6.0 Intermediate Use software on a portable device to document ...
4 6.0 Intermediate Use software on a portable device to document ...
Technology Tool
0 Accounting and financial management systems, A...
1 Accounting and financial management systems, A...
2 Accounting and financial management systems, A...
3 Accounting and financial management systems, A...
4 Accounting and financial management systems, A...
In [32]:
for col in merged_df.columns:
empty_count = merged_df[col].isnull().sum()
print(f"Column '{col}': {empty_count} empty values")
Column 'Occupation Type': 0 empty values Column 'ANZSCO Code': 0 empty values Column 'ANZSCO Title': 0 empty values Column 'Specialist Task': 0 empty values Column 'Specialist Cluster': 0 empty values Column 'Cluster Family': 0 empty values Column 'Skills Statement': 0 empty values Column 'Core Competency': 0 empty values Column 'Score': 0 empty values Column 'Proficiency Level': 0 empty values Column 'Anchor Value': 0 empty values Column 'Technology Tool': 2653 empty values
In [33]:
# Identify rows with missing 'Technology Tool' values
missing_tech_tool_rows = merged_df[merged_df['Technology Tool'].isnull()]
# Iterate through missing rows and try to find a match in technology_tools_df
for index, row in missing_tech_tool_rows.iterrows():
# Find matching entries in technology_tools_df
matched_tools = technology_tools_df[
(technology_tools_df['Occupation Type'] == row['Occupation Type']) &
(technology_tools_df['ANZSCO Title'] == row['ANZSCO Title'])
]['Technology Tool'].dropna().unique() # Drop NaN values and get unique tools
# If matches are found, update 'Technology Tool' column in merged_df
if len(matched_tools) > 0:
merged_df.at[index, 'Technology Tool'] = ', '.join(matched_tools)
# Verify if missing values are reduced
empty_count_after_filling = merged_df['Technology Tool'].isnull().sum()
print(f"After filling missing values, empty 'Technology Tool' count: {empty_count_after_filling}")
After filling missing values, empty 'Technology Tool' count: 2653
In [34]:
# Step 1: Standardize column values (strip spaces, lowercase)
merged_df['Occupation Type'] = merged_df['Occupation Type'].str.strip().str.lower()
merged_df['ANZSCO Title'] = merged_df['ANZSCO Title'].str.strip().str.lower()
technology_tools_df['Occupation Type'] = technology_tools_df['Occupation Type'].str.strip().str.lower()
technology_tools_df['ANZSCO Title'] = technology_tools_df['ANZSCO Title'].str.strip().str.lower()
# Step 2: Re-attempt merging
technology_tools_aggregated = technology_tools_df.groupby(['Occupation Type', 'ANZSCO Title'])['Technology Tool']\
.apply(lambda x: ', '.join(x.dropna().unique())).reset_index()
# Step 3: Merge again
merged_df = pd.merge(merged_df, technology_tools_aggregated, on=['Occupation Type', 'ANZSCO Title'], how='left', suffixes=('', '_new'))
# Step 4: Fill missing Technology Tools from re-merged column
merged_df['Technology Tool'] = merged_df['Technology Tool'].fillna(merged_df['Technology Tool_new'])
# Drop helper column after merging
merged_df.drop(columns=['Technology Tool_new'], inplace=True)
# Step 5: Identify remaining missing values
missing_tech_tool_rows = merged_df[merged_df['Technology Tool'].isnull()]
# Step 6: Debug - Check if missing job titles exist in technology_tools_df
missing_titles = missing_tech_tool_rows[['Occupation Type', 'ANZSCO Title']].drop_duplicates()
unmatched_titles = missing_titles.merge(technology_tools_df[['Occupation Type', 'ANZSCO Title']],
on=['Occupation Type', 'ANZSCO Title'],
how='left', indicator=True)
# Print titles that don't exist in technology_tools_df
print("Job titles that are missing in technology_tools_df:")
print(unmatched_titles[unmatched_titles['_merge'] == 'left_only'])
# Step 7: Final Check - Print remaining empty values count
empty_count_after_filling = merged_df['Technology Tool'].isnull().sum()
print(f"\nAfter second attempt, empty 'Technology Tool' count: {empty_count_after_filling}")
Job titles that are missing in technology_tools_df:
Occupation Type ANZSCO Title \
0 anzsco 4 horticultural crop growers
1 anzsco 4 amusement, fitness and sports centre managers
2 anzsco 4 marine transport professionals
3 anzsco 4 other health diagnostic and promotion professi...
4 anzsco 4 welfare, recreation and community arts workers
.. ... ...
137 anzsco 6 vending machine attendant
138 anzsco 6 car park attendant
139 anzsco 6 crossing supervisor
140 anzsco 6 electrical or telecommunications trades assistant
141 anzsco 6 ticket collector or usher
_merge
0 left_only
1 left_only
2 left_only
3 left_only
4 left_only
.. ...
137 left_only
138 left_only
139 left_only
140 left_only
141 left_only
[142 rows x 3 columns]
After second attempt, empty 'Technology Tool' count: 2572
In [35]:
merged_df.head()
Out[35]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | Anchor Value | Technology Tool | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | anzsco 4 | 1111 | chief executives and managing directors | Direct or manage financial activities or opera... | Manage, monitor and undertake financial activi... | Business operations and financial activities | Direct and oversee the financial operations of... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... | Accounting and financial management systems, A... |
| 1 | anzsco 4 | 1111 | chief executives and managing directors | Direct department or organisational activities | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the activities of a work un... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... | Accounting and financial management systems, A... |
| 2 | anzsco 4 | 1111 | chief executives and managing directors | Direct sales, marketing or customer service ac... | Manage services, staff or activities | Business operations and financial activities | Direct and oversee the sales, marketing, or cu... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... | Accounting and financial management systems, A... |
| 3 | anzsco 4 | 1111 | chief executives and managing directors | Communicate with others to arrange, coordinate... | Communicate or collaborate with others | Communication and collaboration | Coordinate with others in order to plan, organ... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... | Accounting and financial management systems, A... |
| 4 | anzsco 4 | 1111 | chief executives and managing directors | Analyse data to assess operational or project ... | Use data to inform operational decisions | Data, analytics, and databases | Analyse qualitative and quantitative data aris... | Digital engagement | 6.0 | Intermediate | Use software on a portable device to document ... | Accounting and financial management systems, A... |
In [36]:
!pip install fuzzywuzzy
!pip install python-Levenshtein
Collecting fuzzywuzzy Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB) Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB) Installing collected packages: fuzzywuzzy Successfully installed fuzzywuzzy-0.18.0 Collecting python-Levenshtein Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB) Collecting Levenshtein==0.27.1 (from python-Levenshtein) Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB) Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-Levenshtein) Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB) Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB) Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.7/161.7 kB 3.8 MB/s eta 0:00:00 Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 43.9 MB/s eta 0:00:00 Installing collected packages: rapidfuzz, Levenshtein, python-Levenshtein Successfully installed Levenshtein-0.27.1 python-Levenshtein-0.27.1 rapidfuzz-3.12.2
In [37]:
from fuzzywuzzy import process
# Function to find closest matching job title
def find_closest_job(title, job_list):
match, score = process.extractOne(title, job_list)
return match if score > 80 else None # Return only if similarity score > 80%
# Get unique job titles from technology_tools_df
existing_jobs = technology_tools_df['ANZSCO Title'].unique()
# Iterate through missing job titles and find closest matches
missing_tech_tool_rows = merged_df[merged_df['Technology Tool'].isnull()].copy()
for index, row in missing_tech_tool_rows.iterrows():
closest_match = find_closest_job(row['ANZSCO Title'], existing_jobs)
if closest_match:
# Find corresponding technology tool for the closest matched job title
matched_tools = technology_tools_df[technology_tools_df['ANZSCO Title'] == closest_match]['Technology Tool'].dropna().unique()
if len(matched_tools) > 0:
merged_df.at[index, 'Technology Tool'] = ', '.join(matched_tools)
# Final Check - Count remaining empty values
empty_count_final = merged_df['Technology Tool'].isnull().sum()
print(f"After fuzzy matching, empty 'Technology Tool' count: {empty_count_final}")
After fuzzy matching, empty 'Technology Tool' count: 713
In [38]:
# Assign "No Specific Tools" to remaining missing values
merged_df['Technology Tool'].fillna("No Specific Tools", inplace=True)
# Final check on missing values
empty_count_final = merged_df['Technology Tool'].isnull().sum()
print(f"After assigning default values, empty 'Technology Tool' count: {empty_count_final}")
After assigning default values, empty 'Technology Tool' count: 0
<ipython-input-38-6cf7402ffb4c>:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.
merged_df['Technology Tool'].fillna("No Specific Tools", inplace=True)
In [39]:
# Drop the specified column from occupation_descriptions_df
occupation_descriptions_df = occupation_descriptions_df.drop(columns=['Sub-Profile Code'], errors='ignore')
# Display the updated DataFrame (optional)
occupation_descriptions_df.head()
Out[39]:
| Occupation Type | ANZSCO Code | ANZSCO Title | ANZSCO Description | |
|---|---|---|---|---|
| 0 | ANZSCO 4 | 1111 | Chief Executives and Managing Directors | Chief Executives and Managing Directors determ... |
| 1 | ANZSCO 4 | 1112 | General Managers | General Managers plan, organise, direct, contr... |
| 2 | ANZSCO 4 | 1211 | Aquaculture Farmers | Aquaculture Farmers plan, organise, control, c... |
| 3 | ANZSCO 4 | 1213 | Livestock Farmers | Livestock Farmers plan, organise, control, coo... |
| 4 | ANZSCO 4 | 1215 | Broadacre Crop Growers | Broadacre Crop Growers plan, organise, control... |
In [40]:
occupation_descriptions_df.shape
Out[40]:
(1650, 4)
In [107]:
import pandas as pd
# Assuming merged_df is already loaded and contains the results from previous merging processes
# Perform the merge to add 'ANZSCO Description'
merged_df = pd.merge(merged_df, occupation_descriptions_df[['Occupation Type', 'ANZSCO Title', 'ANZSCO Description']],
on=['Occupation Type', 'ANZSCO Title'],
how='left')
# Now, merged_df will have an additional column 'ANZSCO Description' from occupation_descriptions_df
# This column contains descriptions based on the matched 'Occupation Type' and 'ANZSCO Title'
# Print the first few rows to verify the new structure and content
# Optionally, check the number of rows to ensure they haven't increased
print("Number of rows in the merged DataFrame:", len(merged_df))
merged_df.head()
Number of rows in the merged DataFrame: 23882
Out[107]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | ... | bigrams | trigrams | description_length | ner | lemmatized | pos_tags | sentiment | sentiment_polarity | ANZSCO Description_y | ANZSCO Description_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | anzsco 6 | 394211 | furniture finisher | Grind materials, parts, or items | Operate production equipment and make products | Production processes and machinery | Set up, adjust, and operate grinding tools or ... | Digital engagement | 4.0 | Intermediate | ... | [set adjust, adjust operate, operate grinding,... | [set adjust operate, adjust operate grinding, ... | 52 | [] | set , adjust , operate grind tool equipment ma... | VERB ADP PUNCT VERB PUNCT CCONJ VERB VERB NOUN... | 0.222857 | NaN | NaN | NaN |
| 1 | anzsco 6 | 394211 | furniture finisher | Fill cracks, imperfections or holes in product... | Repair parts or components | Production processes and machinery | Apply fillers, sealants, compounds, or adhesiv... | Digital engagement | 4.0 | Intermediate | ... | [apply fillers, fillers sealants, sealants com... | [apply fillers sealants, fillers sealants comp... | 29 | [] | apply filler , sealant , compound , adhesive o... | VERB NOUN PUNCT NOUN PUNCT NOUN PUNCT CCONJ NO... | 0.400000 | NaN | NaN | NaN |
| 2 | anzsco 6 | 394211 | furniture finisher | Operate spraying, coating, or painting equipment | Apply paint or finishes | Construction | Operate spraying, coating, or painting equipme... | Digital engagement | 4.0 | Intermediate | ... | [operate spraying, spraying coating, coating p... | [operate spraying coating, spraying coating pa... | 67 | [] | operate spray , coating , paint equipment ( sp... | VERB VERB PUNCT NOUN PUNCT CCONJ VERB NOUN PUN... | 0.294444 | NaN | NaN | NaN |
| 3 | anzsco 6 | 394211 | furniture finisher | Remove accessories, tools, components or other... | Remove or dismantle objects and equipment | Material transportation | Manually or with the assistance of tools or eq... | Digital engagement | 4.0 | Intermediate | ... | [manually assistance, assistance tools, tools ... | [manually assistance tools, assistance tools e... | 22 | [] | manually assistance tool equipment , remove ac... | ADV CCONJ ADP DET NOUN ADP NOUN CCONJ NOUN PUN... | 0.366667 | NaN | NaN | NaN |
| 4 | anzsco 6 | 394211 | furniture finisher | Confer with clients, customers, or designers t... | Communicate with others to coordinate work | Communication and collaboration | Have discussions with customers, clients, or d... | Digital engagement | 4.0 | Intermediate | ... | [discussions customers, customers clients, cli... | [discussions customers clients, customers clie... | 40 | [] | discussion customer , client , designer determ... | VERB NOUN ADP NOUN PUNCT NOUN PUNCT CCONJ NOUN... | 0.180000 | NaN | NaN | NaN |
5 rows × 28 columns
In [103]:
try:
merged_df = merged_df.drop(columns=['ANZSCO Description_y', 'ANZSCO Description'])
except KeyError as e:
print(f"Column not found: {e}")
Column not found: "['ANZSCO Description'] not found in axis"
In [104]:
for col in merged_df.columns:
empty_count = merged_df[col].isnull().sum()
print(f"Column '{col}': {empty_count} empty values")
Column 'Occupation Type': 0 empty values Column 'ANZSCO Code': 0 empty values Column 'ANZSCO Title': 0 empty values Column 'Specialist Task': 0 empty values Column 'Specialist Cluster': 0 empty values Column 'Cluster Family': 0 empty values Column 'Skills Statement': 0 empty values Column 'Core Competency': 0 empty values Column 'Score': 0 empty values Column 'Proficiency Level': 0 empty values Column 'Anchor Value': 0 empty values Column 'Technology Tool': 0 empty values Column 'ANZSCO Description_x': 23882 empty values Column 'Technology Tool Description': 0 empty values Column 'Technology Tool Extended Description': 0 empty values Column 'Technology Tool Category': 0 empty values Column 'Technology Tool Category Description': 0 empty values Column 'Processed_Skills_Manual': 0 empty values Column 'bigrams': 0 empty values Column 'trigrams': 0 empty values Column 'description_length': 0 empty values Column 'ner': 0 empty values Column 'lemmatized': 0 empty values Column 'pos_tags': 0 empty values Column 'sentiment': 0 empty values Column 'sentiment_polarity': 23882 empty values Column 'ANZSCO Description_y': 23882 empty values
In [105]:
try:
merged_df = merged_df.rename(columns={'ANZSCO Description_x': 'ANZSCO Description'})
except KeyError as e:
print(f"Column not found: {e}")
In [106]:
merged_df.head()
Out[106]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | ... | Processed_Skills_Manual | bigrams | trigrams | description_length | ner | lemmatized | pos_tags | sentiment | sentiment_polarity | ANZSCO Description_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | anzsco 6 | 394211 | furniture finisher | Grind materials, parts, or items | Operate production equipment and make products | Production processes and machinery | Set up, adjust, and operate grinding tools or ... | Digital engagement | 4.0 | Intermediate | ... | set adjust operate grinding tools equipment ma... | [set adjust, adjust operate, operate grinding,... | [set adjust operate, adjust operate grinding, ... | 52 | [] | set , adjust , operate grind tool equipment ma... | VERB ADP PUNCT VERB PUNCT CCONJ VERB VERB NOUN... | 0.222857 | NaN | NaN |
| 1 | anzsco 6 | 394211 | furniture finisher | Fill cracks, imperfections or holes in product... | Repair parts or components | Production processes and machinery | Apply fillers, sealants, compounds, or adhesiv... | Digital engagement | 4.0 | Intermediate | ... | apply fillers sealants compounds adhesives ord... | [apply fillers, fillers sealants, sealants com... | [apply fillers sealants, fillers sealants comp... | 29 | [] | apply filler , sealant , compound , adhesive o... | VERB NOUN PUNCT NOUN PUNCT NOUN PUNCT CCONJ NO... | 0.400000 | NaN | NaN |
| 2 | anzsco 6 | 394211 | furniture finisher | Operate spraying, coating, or painting equipment | Apply paint or finishes | Construction | Operate spraying, coating, or painting equipme... | Digital engagement | 4.0 | Intermediate | ... | operate spraying coating painting equipment sp... | [operate spraying, spraying coating, coating p... | [operate spraying coating, spraying coating pa... | 67 | [] | operate spray , coating , paint equipment ( sp... | VERB VERB PUNCT NOUN PUNCT CCONJ VERB NOUN PUN... | 0.294444 | NaN | NaN |
| 3 | anzsco 6 | 394211 | furniture finisher | Remove accessories, tools, components or other... | Remove or dismantle objects and equipment | Material transportation | Manually or with the assistance of tools or eq... | Digital engagement | 4.0 | Intermediate | ... | manually assistance tools equipment remove acc... | [manually assistance, assistance tools, tools ... | [manually assistance tools, assistance tools e... | 22 | [] | manually assistance tool equipment , remove ac... | ADV CCONJ ADP DET NOUN ADP NOUN CCONJ NOUN PUN... | 0.366667 | NaN | NaN |
| 4 | anzsco 6 | 394211 | furniture finisher | Confer with clients, customers, or designers t... | Communicate with others to coordinate work | Communication and collaboration | Have discussions with customers, clients, or d... | Digital engagement | 4.0 | Intermediate | ... | discussions customers clients designers determ... | [discussions customers, customers clients, cli... | [discussions customers clients, customers clie... | 40 | [] | discussion customer , client , designer determ... | VERB NOUN ADP NOUN PUNCT NOUN PUNCT CCONJ NOUN... | 0.180000 | NaN | NaN |
5 rows × 27 columns
In [46]:
merged_df.shape
Out[46]:
(23882, 13)
In [47]:
tech_tools_hierarchy_df.head()
Out[47]:
| Technology Tool Category | Technology Tool Category Description | Technology Tool | Technology Tool Description | Technology Tool Extended Description | |
|---|---|---|---|---|---|
| 0 | Broadcasting and audio-visual production techn... | Systems for audio, video, multimedia and news ... | Collaborative news production platforms | Software used to create and manage news conten... | Collaborative news production platforms encomp... |
| 1 | Broadcasting and audio-visual production techn... | Systems for audio, video, multimedia and news ... | Music or sound editing software | Software used to create, manipulate, and edit ... | Music and sound editing software incorporates ... |
| 2 | Broadcasting and audio-visual production techn... | Systems for audio, video, multimedia and news ... | Sound and audio hardware | Apparatus used to create, manipulate, mix and/... | Hardware and equipment used to generate electr... |
| 3 | Broadcasting and audio-visual production techn... | Systems for audio, video, multimedia and news ... | Video creation and editing software | Software used to create and edit digital video... | Video creation and editing software enables th... |
| 4 | Communication technologies | Data, voice and/or video communication platforms | Audio/video conferencing software | Software for collaboration using video or audi... | Audio/video conferencing software enables peop... |
In [48]:
nan_count = merged_df['Technology Tool'].isnull().sum()
print(f"Number of NaN values in 'Technology Tool' column: {nan_count}")
Number of NaN values in 'Technology Tool' column: 0
In [49]:
tech_tools_hierarchy_df.columns.tolist()
Out[49]:
['Technology Tool Category', 'Technology Tool Category Description', 'Technology Tool', 'Technology Tool Description', 'Technology Tool Extended Description']
In [50]:
# Step 1: Ensure both dataframes are sorted for sequential traversal
merged_df = merged_df.sort_values(by=['Technology Tool']).reset_index(drop=True)
tech_tools_hierarchy_df = tech_tools_hierarchy_df.sort_values(by=['Technology Tool']).reset_index(drop=True)
# Step 2: Initialize index for tracking tech_tools_hierarchy_df
tech_index = 0
# Step 3: Iterate through merged_df and match 'Technology Tool' with tech_tools_hierarchy_df
for i in range(len(merged_df)):
while tech_index < len(tech_tools_hierarchy_df) and merged_df.loc[i, 'Technology Tool'] == tech_tools_hierarchy_df.loc[tech_index, 'Technology Tool']:
# Assign values from tech_tools_hierarchy_df to merged_df
merged_df.at[i, 'Technology Tool Description'] = tech_tools_hierarchy_df.loc[tech_index, 'Technology Tool Description']
merged_df.at[i, 'Technology Tool Extended Description'] = tech_tools_hierarchy_df.loc[tech_index, 'Technology Tool Extended Description']
merged_df.at[i, 'Technology Tool Category'] = tech_tools_hierarchy_df.loc[tech_index, 'Technology Tool Category']
merged_df.at[i, 'Technology Tool Category Description'] = tech_tools_hierarchy_df.loc[tech_index, 'Technology Tool Category Description']
tech_index += 1 # Move to the next entry in tech_tools_hierarchy_df
# Reset tech_index when end is reached
if tech_index >= len(tech_tools_hierarchy_df):
tech_index = 0
# Step 4: Fill remaining missing values with "Not Available" to ensure no NaN values
columns_to_fill = ['Technology Tool Description', 'Technology Tool Extended Description',
'Technology Tool Category', 'Technology Tool Category Description']
merged_df[columns_to_fill] = merged_df[columns_to_fill].fillna("Not Available")
merged_df.shape
Out[50]:
(23882, 17)
In [51]:
merged_df.columns.tolist()
Out[51]:
['Occupation Type', 'ANZSCO Code', 'ANZSCO Title', 'Specialist Task', 'Specialist Cluster', 'Cluster Family', 'Skills Statement', 'Core Competency', 'Score', 'Proficiency Level', 'Anchor Value', 'Technology Tool', 'ANZSCO Description', 'Technology Tool Description', 'Technology Tool Extended Description', 'Technology Tool Category', 'Technology Tool Category Description']
In [52]:
merged_df.shape
Out[52]:
(23882, 17)
In [53]:
merged_df.head()
Out[53]:
| Occupation Type | ANZSCO Code | ANZSCO Title | Specialist Task | Specialist Cluster | Cluster Family | Skills Statement | Core Competency | Score | Proficiency Level | Anchor Value | Technology Tool | ANZSCO Description | Technology Tool Description | Technology Tool Extended Description | Technology Tool Category | Technology Tool Category Description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | anzsco 6 | 394211 | furniture finisher | Grind materials, parts, or items | Operate production equipment and make products | Production processes and machinery | Set up, adjust, and operate grinding tools or ... | Digital engagement | 4.0 | Intermediate | Recognise different ways to connect to the int... | Accounting and financial management systems | NaN | Software for managing accounts, inventory, and... | Accounting and financial management systems en... | Financial management and service delivery plat... | Systems to undertake financial management and ... |
| 1 | anzsco 6 | 394211 | furniture finisher | Fill cracks, imperfections or holes in product... | Repair parts or components | Production processes and machinery | Apply fillers, sealants, compounds, or adhesiv... | Digital engagement | 4.0 | Intermediate | Recognise different ways to connect to the int... | Accounting and financial management systems | NaN | Not Available | Not Available | Not Available | Not Available |
| 2 | anzsco 6 | 394211 | furniture finisher | Operate spraying, coating, or painting equipment | Apply paint or finishes | Construction | Operate spraying, coating, or painting equipme... | Digital engagement | 4.0 | Intermediate | Recognise different ways to connect to the int... | Accounting and financial management systems | NaN | Not Available | Not Available | Not Available | Not Available |
| 3 | anzsco 6 | 394211 | furniture finisher | Remove accessories, tools, components or other... | Remove or dismantle objects and equipment | Material transportation | Manually or with the assistance of tools or eq... | Digital engagement | 4.0 | Intermediate | Recognise different ways to connect to the int... | Accounting and financial management systems | NaN | Not Available | Not Available | Not Available | Not Available |
| 4 | anzsco 6 | 394211 | furniture finisher | Confer with clients, customers, or designers t... | Communicate with others to coordinate work | Communication and collaboration | Have discussions with customers, clients, or d... | Digital engagement | 4.0 | Intermediate | Recognise different ways to connect to the int... | Accounting and financial management systems | NaN | Not Available | Not Available | Not Available | Not Available |
In [54]:
# Calculate the ratio of missing values for specific columns
columns_to_check = ['Technology Tool Category', 'Technology Tool Category Description', 'Technology Tool Description', 'Technology Tool Extended Description']
total_rows = len(merged_df)
for column in columns_to_check:
missing_count = merged_df[column].isnull().sum()
missing_ratio = missing_count / total_rows
print(f"Ratio of missing values in '{column}': {missing_ratio:.4f}")
Ratio of missing values in 'Technology Tool Category': 0.0000 Ratio of missing values in 'Technology Tool Category Description': 0.0000 Ratio of missing values in 'Technology Tool Description': 0.0000 Ratio of missing values in 'Technology Tool Extended Description': 0.0000
In [55]:
# Assuming 'merged_df' is the DataFrame you want to export
merged_df.to_csv('merged_data.csv', index=False) # Set index=False to avoid saving row indices
EDA¶
In [108]:
import pandas as pd
url = "https://raw.githubusercontent.com/saifrahmania/Data36118/refs/heads/main/Assignment1/Data/merged_data.csv"
data = pd.read_csv(url)
# Display dataset shape
print("Dataset Shape:", data.shape)
# Check for missing values
print("Missing Values:\n", data.isnull().sum())
# Inspect the first few rows
print("First Few Rows:\n", data.head())
Dataset Shape: (23882, 17)
Missing Values:
Occupation Type 0
ANZSCO Code 0
ANZSCO Title 0
Specialist Task 0
Specialist Cluster 0
Cluster Family 0
Skills Statement 0
Core Competency 0
Score 0
Proficiency Level 0
Anchor Value 0
ANZSCO Description 0
Technology Tool 0
Technology Tool Category 0
Technology Tool Category Description 0
Technology Tool Description 0
Technology Tool Extended Description 0
dtype: int64
First Few Rows:
Occupation Type ANZSCO Code ANZSCO Title \
0 anzsco 6 394211 furniture finisher
1 anzsco 6 394211 furniture finisher
2 anzsco 6 394211 furniture finisher
3 anzsco 6 394211 furniture finisher
4 anzsco 6 394211 furniture finisher
Specialist Task \
0 Grind materials, parts, or items
1 Select production input materials
2 Treat timber
3 Shape surfaces or edges of wood work pieces
4 Repair furniture or upholstery
Specialist Cluster \
0 Operate production equipment and make products
1 Manage construction or production projects
2 Undertake woodworking or carpentry
3 Undertake woodworking or carpentry
4 Repair parts or components
Cluster Family \
0 Production processes and machinery
1 Work activities preparation
2 Construction
3 Construction
4 Production processes and machinery
Skills Statement Core Competency \
0 Set up, adjust, and operate grinding tools or ... Digital engagement
1 Select appropriate production input materials ... Digital engagement
2 Treat timber in order to protect it from deter... Digital engagement
3 Form specific shapes, patterns, textures or ot... Digital engagement
4 Return functionality or desired appearance to ... Digital engagement
Score Proficiency Level Anchor Value \
0 4.0 Intermediate Recognise different ways to connect to the int...
1 4.0 Intermediate Recognise different ways to connect to the int...
2 4.0 Intermediate Recognise different ways to connect to the int...
3 4.0 Intermediate Recognise different ways to connect to the int...
4 4.0 Intermediate Recognise different ways to connect to the int...
ANZSCO Description \
0 Applies finishes, such as stain, lacquer, pain...
1 Applies finishes, such as stain, lacquer, pain...
2 Applies finishes, such as stain, lacquer, pain...
3 Applies finishes, such as stain, lacquer, pain...
4 Applies finishes, such as stain, lacquer, pain...
Technology Tool \
0 Accounting and financial management systems
1 Accounting and financial management systems
2 Accounting and financial management systems
3 Accounting and financial management systems
4 Accounting and financial management systems
Technology Tool Category \
0 Financial management and service delivery plat...
1 Financial management and service delivery plat...
2 Financial management and service delivery plat...
3 Financial management and service delivery plat...
4 Financial management and service delivery plat...
Technology Tool Category Description \
0 Systems to undertake financial management and ...
1 Systems to undertake financial management and ...
2 Systems to undertake financial management and ...
3 Systems to undertake financial management and ...
4 Systems to undertake financial management and ...
Technology Tool Description \
0 Software for managing accounts, inventory, and...
1 Software for managing accounts, inventory, and...
2 Software for managing accounts, inventory, and...
3 Software for managing accounts, inventory, and...
4 Software for managing accounts, inventory, and...
Technology Tool Extended Description
0 Accounting and financial management systems en...
1 Accounting and financial management systems en...
2 Accounting and financial management systems en...
3 Accounting and financial management systems en...
4 Accounting and financial management systems en...
In [109]:
# Step 1.1: Load and Inspect Data
# Already loaded and inspected, so I will define the DataFrame variable name as 'df' to be used throughout the analysis.
df = data
# Step 1.2: Descriptive Statistics
# Count the number of unique skills, job roles (using ANZSCO Title), and industries (using Occupation Type).
unique_skills = df['Skills Statement'].nunique()
unique_job_roles = df['ANZSCO Title'].nunique()
unique_industries = df['Occupation Type'].nunique()
# Analyze the distribution of skills across different categories (Specialist Cluster).
skills_distribution = df['Specialist Cluster'].value_counts()
# Check the frequency of unique job titles and their variations.
job_titles_frequency = df['ANZSCO Title'].value_counts()
# Step 1.3: Handling Missing Data
# No missing data found from initial inspection. Confirming if any columns have missing data and need handling.
missing_data_check = df.isnull().any()
# Display results from Descriptive Statistics and Missing Data Check
unique_skills, unique_job_roles, unique_industries, skills_distribution.head(), job_titles_frequency.head(), missing_data_check
Out[109]:
(1653, 1101, 2, Specialist Cluster Provide customer service and communicate information 490 Undertake or provide professional skill and knowledge development 444 Inspect, test or maintain equipment or systems 435 Communicate or collaborate with others 413 Provide health care or administer medical treatment 412 Name: count, dtype: int64, ANZSCO Title web administrator 45 systems administrator 45 dance teacher (private tuition) 44 primary school teacher 43 primary school teachers 43 Name: count, dtype: int64, Occupation Type False ANZSCO Code False ANZSCO Title False Specialist Task False Specialist Cluster False Cluster Family False Skills Statement False Core Competency False Score False Proficiency Level False Anchor Value False ANZSCO Description False Technology Tool False Technology Tool Category False Technology Tool Category Description False Technology Tool Description False Technology Tool Extended Description False dtype: bool)
In [110]:
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
# Step 2.1: Word Frequency Analysis
# Define a basic set of English stopwords manually as a fallback
manual_stopwords = {'and', 'the', 'to', 'of', 'a', 'in', 'for', 'on', 'with', 'as', 'by', 'is', 'that', 'it', 'from', 'this', 'be', 'which', 'or', 'are', 'was', 'will', 'at', 'an', 'have', 'not', 'their', 'has', 'can', 'all', 'any', 'if', 'but', 'they', 'you', 'your', 'one', 'what', 'some', 'other', 'such', 'into', 'do', 'also', 'use', 'used', 'using', 'up', 'out', 'about', 'than', 'more', 'its', 'each', 'when', 'how', 'like', 'these', 'just', 'his', 'her', 'over', 'who', 'them', 'get', 'may', 'new', 'would'}
# Update the text preprocessing function using the manual list of stopwords
def preprocess_text_manual(text):
text = text.lower()
text = ''.join([char for char in text if char.isalnum() or char.isspace()])
tokens = [word for word in text.split() if word not in manual_stopwords]
return ' '.join(tokens)
# Apply the updated text preprocessing
df['Processed_Skills_Manual'] = df['Skills Statement'].apply(preprocess_text_manual)
# Import CountVectorizer from sklearn
from sklearn.feature_extraction.text import CountVectorizer
# Count the most frequently used words again
vectorizer_manual = CountVectorizer()
word_matrix_manual = vectorizer_manual.fit_transform(df['Processed_Skills_Manual'])
word_counts_manual = word_matrix_manual.sum(axis=0)
words_freq_manual = [(word, word_counts_manual[0, idx]) for word, idx in vectorizer_manual.vocabulary_.items()]
words_freq_manual = sorted(words_freq_manual, key=lambda x: x[1], reverse=True)[:20]
# Displaying the words and their frequencies
print("Top 20 Most Frequent Words and Their Frequencies:")
for word, freq in words_freq_manual:
print(f"{word}: {freq}")
# Create bar charts again with the updated data
words_manual, counts_manual = zip(*words_freq_manual)
plt.figure(figsize=(10, 8))
plt.bar(words_manual, counts_manual, color='blue')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.title('Top 20 Most Frequent Words in Skills Descriptions (Manual Stopwords)')
plt.xticks(rotation=90)
plt.show()
Top 20 Most Frequent Words and Their Frequencies: ensure: 12778 equipment: 12227 relevant: 10932 information: 10188 order: 10023 work: 9310 requirements: 8498 include: 8473 safety: 7701 standards: 7494 procedures: 7070 involve: 5776 materials: 5392 activities: 5365 appropriate: 5354 data: 5328 regulations: 5322 technical: 4901 issues: 4748 health: 4207
In [111]:
# Step 2.2: N-Grams Analysis (Phrase Extraction)
# Using the preprocessed text to generate bigrams and trigrams
# Function to generate n-grams
def generate_ngrams(text, n=2):
words = text.split()
ngrams = zip(*[words[i:] for i in range(n)])
return [' '.join(ngram) for ngram in ngrams]
# Apply the function to generate bigrams and trigrams
df['bigrams'] = df['Processed_Skills_Manual'].apply(generate_ngrams, n=2)
df['trigrams'] = df['Processed_Skills_Manual'].apply(generate_ngrams, n=3)
# Flatten the list of bigrams and trigrams to count frequency
all_bigrams = [bigram for sublist in df['bigrams'].tolist() for bigram in sublist]
all_trigrams = [trigram for sublist in df['trigrams'].tolist() for trigram in sublist]
# Count frequency of bigrams and trigrams
bigram_freq = pd.Series(all_bigrams).value_counts().head(20)
trigram_freq = pd.Series(all_trigrams).value_counts().head(20)
print("Top 20 Most Frequent Bigrams:")
print(bigram_freq)
print("\nTop 20 Most Frequent Trigrams:")
print(trigram_freq)
# Plot the top 20 bigrams
plt.figure(figsize=(10, 8))
bigram_freq.plot(kind='bar', color='green')
plt.title('Top 20 Most Frequent Bigrams in Skills Descriptions')
plt.xlabel('Bigrams')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.show()
# Plot the top 20 trigrams
plt.figure(figsize=(10, 8))
trigram_freq.plot(kind='bar', color='red')
plt.title('Top 20 Most Frequent Trigrams in Skills Descriptions')
plt.xlabel('Trigrams')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.show()
Top 20 Most Frequent Bigrams: health safety 1917 relevant information 1557 order ensure 1410 work activities 1169 technical knowledge 1141 work health 1090 policies procedures 1086 specialist technical 1074 include providing 1016 regulations standards 975 safety standards 972 order determine 969 adhere relevant 946 select appropriate 940 relevant regulations 918 protective equipment 852 ensure safety 844 industry standards 843 personal protective 812 relevant standards 812 Name: count, dtype: int64 Top 20 Most Frequent Trigrams: work health safety 1090 specialist technical knowledge 874 personal protective equipment 749 project management tasks 623 providing specialist technical 592 technical knowledge guidance 576 general project management 565 undertaking general project 565 include providing specialist 557 procedures further action 551 further action reporting 529 health safety standards 505 adhere relevant regulations 495 relevant regulations standards 492 information security privacy 467 follow operational procedures 451 materials resources equipment 439 supervision guidance direction 423 staff resource allocation 423 resource allocation providing 423 Name: count, dtype: int64
In [112]:
!pip install wordcloud
Requirement already satisfied: wordcloud in /usr/local/lib/python3.11/dist-packages (1.9.4) Requirement already satisfied: numpy>=1.6.1 in /usr/local/lib/python3.11/dist-packages (from wordcloud) (1.26.4) Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from wordcloud) (11.1.0) Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from wordcloud) (3.10.0) Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (1.3.1) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (4.56.0) Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (1.4.8) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (24.2) Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (3.2.1) Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->wordcloud) (2.9.0.post0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.17.0)
In [113]:
# Step 2.3: Word Cloud Visualization
from wordcloud import WordCloud
# Function to generate a word cloud
def generate_word_cloud(text_series):
text_combined = ' '.join(text_series)
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_combined)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# Generate word cloud for all skills descriptions
generate_word_cloud(df['Processed_Skills_Manual'])
In [114]:
# Step 2.4: Text Length Distribution
# Calculate the length of each skill description in words
df['description_length'] = df['Processed_Skills_Manual'].apply(lambda x: len(x.split()))
# Plot the histogram of description lengths
plt.figure(figsize=(10, 5))
plt.hist(df['description_length'], bins=30, color='purple', edgecolor='black')
plt.title('Distribution of Skills Description Lengths')
plt.xlabel('Length of Description (words)')
plt.ylabel('Frequency')
plt.show()
# Additionally, use a box plot to see the variation and outliers
plt.figure(figsize=(10, 5))
plt.boxplot(df['description_length'], vert=False)
plt.title('Box Plot of Skills Description Lengths')
plt.xlabel('Length of Description (words)')
plt.show()
In [115]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
# Step 3.1: TF-IDF Vectorization
# Limiting to top 1000 features for simplicity and using float32 for memory efficiency
tfidf_vectorizer = TfidfVectorizer(max_features=1000, dtype=np.float32)
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Processed_Skills_Manual'])
# Get feature names to use as dataframe columns
tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
# Step 3.2: Cosine Similarity (Skill Matching)
# Measure similarity between skills using cosine similarity
# Computing cosine similarity in chunks to manage memory usage
def chunked_cosine_similarity(matrix, chunk_size=1000):
cosine_sim = []
for i in range(0, matrix.shape[0], chunk_size):
cosine_sim.append(cosine_similarity(matrix[i:i+chunk_size], matrix))
return np.vstack(cosine_sim)
cosine_sim_matrix = chunked_cosine_similarity(tfidf_matrix)
# Display part of the cosine similarity matrix
print(cosine_sim_matrix[:5, :5]) # Show the top-left corner of the matrix for brevity
# Step 3.3: Topic Modeling using Latent Dirichlet Allocation (LDA)
# Use LDA with batch_size for memory management
lda = LatentDirichletAllocation(n_components=5, random_state=0, learning_method='batch', batch_size=128)
lda.fit(tfidf_matrix)
# Function to display topics and their top words
def display_topics(model, feature_names, no_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic %d:" % (topic_idx + 1))
print(" ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]))
# Display the topics
display_topics(lda, tfidf_feature_names, 10)
[[1. 0.09233322 0.14015481 0.2647536 0.140082 ] [0.09233322 1. 0.09939328 0.03324211 0. ] [0.14015481 0.09939328 1. 0.0833383 0.03711684] [0.2647536 0.03324211 0.0833383 0.99999994 0.33450142] [0.140082 0. 0.03711684 0.33450142 1. ]] Topic 1: activities staff provide support providing ensure needs skills work learning Topic 2: order data materials information requirements equipment products determine analysis standards Topic 3: information relevant medical patient procedures equipment regulations ensure issues standards Topic 4: equipment work tools requirements ensure specifications order appropriate safety design Topic 5: equipment include materials data financial items business relevant products resources
In [116]:
import matplotlib.pyplot as plt
import numpy as np
# Assuming 'tfidf_matrix' and 'tfidf_feature_names' are defined
# Sum the TF-IDF scores for each term across all documents and ensure it's a flat array
sum_tfidf = np.sum(tfidf_matrix, axis=0).A1 # Use .A1 to flatten the matrix to a 1D array if it's a scipy sparse matrix
words = np.array(tfidf_feature_names) # Feature names (words)
sorted_indices = np.argsort(sum_tfidf)[::-1] # Sort by score
sorted_words = words[sorted_indices]
sorted_scores = sum_tfidf[sorted_indices]
# Option 1: Line Plot
plt.figure(figsize=(14, 7))
plt.plot(sorted_scores, marker='o', linestyle='-', color='b')
plt.title('TF-IDF Scores Distribution')
plt.ylabel('Summed TF-IDF Scores')
plt.xlabel('Word Index')
plt.grid(True)
plt.show()
# Option 2: Scatter Plot
plt.figure(figsize=(12, 8))
plt.scatter(range(len(sorted_words)), sorted_scores, color='blue')
top_n = 20 # Number of top words to annotate
for i in range(top_n):
plt.annotate(sorted_words[i], (i, sorted_scores[i]), textcoords="offset points", xytext=(0,10), ha='center')
plt.title('Scatter Plot of TF-IDF Scores')
plt.xlabel('Words Index')
plt.ylabel('Summed TF-IDF Scores')
plt.show()
In [117]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Assuming 'cosine_sim_matrix' is already computed
# Define a smaller subset size for a more focused view
subset_size = 5 # Set to 5 for a 5x5 matrix
num_docs = cosine_sim_matrix.shape[0]
if num_docs > subset_size:
# Choose a random subset of indices to reduce the dataset size
np.random.seed(0) # For reproducibility
indices = np.random.choice(num_docs, subset_size, replace=False)
sim_subset = cosine_sim_matrix[indices, :][:, indices]
else:
sim_subset = cosine_sim_matrix # Use the entire matrix if it's smaller than the subset size
# Plot the subset of the cosine similarity matrix
plt.figure(figsize=(8, 6))
sns.heatmap(sim_subset, annot=True, cmap='viridis', square=True) # `annot=True` to show the similarity scores
plt.title('5x5 Cosine Similarity Matrix')
plt.xlabel('Document Index')
plt.ylabel('Document Index')
plt.show()
In [118]:
import matplotlib.pyplot as plt
import numpy as np
def plot_top_words(model, feature_names, n_top_words, title):
fig, axes = plt.subplots(2, 3, figsize=(30, 15), sharex=True)
axes = axes.flatten()
for topic_idx, topic in enumerate(model.components_):
top_features_ind = topic.argsort()[:-n_top_words - 1:-1]
top_features = [feature_names[i] for i in top_features_ind]
weights = topic[top_features_ind]
ax = axes[topic_idx]
ax.barh(top_features, weights, height=0.7)
ax.set_title(f'Topic {topic_idx +1}',
fontdict={'fontsize': 30})
ax.invert_yaxis()
ax.tick_params(axis='both', which='major', labelsize=20)
for i in 'top right left'.split():
ax.spines[i].set_visible(False)
fig.suptitle(title, fontsize=40)
plt.subplots_adjust(top=0.90, bottom=0.05, wspace=0.90, hspace=0.3)
plt.show()
# Assuming 'lda', 'tfidf_feature_names' are defined from your code
plot_top_words(lda, tfidf_feature_names, 10, 'Topics in LDA Model')
In [119]:
import matplotlib.pyplot as plt
# Group by 'Occupation Type' and count entries in each category
occupation_grouped = df['Occupation Type'].value_counts()
# Plotting the distribution of Occupation Types
plt.figure(figsize=(12, 6))
occupation_grouped.plot(kind='bar', color='skyblue')
plt.title('Distribution of Occupation Types')
plt.xlabel('Occupation Type')
plt.ylabel('Number of Entries')
plt.xticks(rotation=45, ha='right')
plt.show()
In [120]:
# Group skills by 'Occupation Type' and aggregate the unique skills in each category
industry_skills = df.groupby('Occupation Type')['Skills Statement'].nunique()
# Visualizing the number of unique skills per industry
plt.figure(figsize=(12, 6))
industry_skills.plot(kind='bar', color='lightgreen')
plt.title('Number of Unique Skills per Industry')
plt.xlabel('Industry')
plt.ylabel('Unique Skills Count')
plt.xticks(rotation=45, ha='right')
plt.show()
# Identify overlapping and unique skills across industries by creating sets of skills for each industry and comparing them
skills_sets = df.groupby('Occupation Type')['Skills Statement'].apply(set).to_dict()
# Example of comparing skills between two industries (replace 'Industry1' and 'Industry2' with actual industry names)
# This is assuming there are multiple industries, replace the keys with actual ones from your data
industries = list(skills_sets.keys())
if len(industries) > 1:
common_skills = skills_sets[industries[0]].intersection(skills_sets[industries[1]])
unique_skills = skills_sets[industries[0]].symmetric_difference(skills_sets[industries[1]])
print(f"Common Skills in {industries[0]} and {industries[1]}: {len(common_skills)}")
print(f"Unique Skills in {industries[0]} and {industries[1]}: {len(unique_skills)}")
Common Skills in anzsco 4 and anzsco 6: 1324 Unique Skills in anzsco 4 and anzsco 6: 329
In [121]:
import spacy
from spacy import displacy
# Load the English NLP model from spaCy
nlp = spacy.load("en_core_web_sm")
# Function to apply NER and extract entities
def extract_entities(text):
doc = nlp(text)
return [(ent.text, ent.label_) for ent in doc.ents]
# Apply the function to the 'Skills Statement' column
df['ner'] = df['Skills Statement'].apply(extract_entities)
# Print the first few entries with non-empty NER results
for index, ner_entries in enumerate(df['ner']):
if ner_entries: # Only display non-empty NER results
print(f"Index {index}: {ner_entries}")
if index > 20: # Limit output to first 20 non-empty results
break
Index 1: [('Evaluate', 'ORG')]
Index 16: [('Identify', 'ORG')]
Index 20: [('Java', 'PERSON')]
In [122]:
sample_text = df['Skills Statement'][10] # Assuming the second entry has text suitable for demonstration
doc = nlp(sample_text)
displacy.render(doc, style='ent', jupyter=True)
/usr/local/lib/python3.11/dist-packages/spacy/displacy/__init__.py:213: UserWarning: [W006] No entities to visualize found in Doc object. If this is surprising to you, make sure the Doc was processed using a model that supports named entity recognition, and check the `doc.ents` property manually if necessary. warnings.warn(Warnings.W006)
Operate spraying, coating, or painting equipment (such as spray guns, paint mixing systems or air pressure regulators) in order to apply a decorative, protective, or functional coating to items, objects, buildings, structures, vehicles, or vegetation. Review work instructions, manufacturer instructions, material requirements and other relevant information to select appropriate equipment, solutions, and techniques. This may involve running tests or checks on equipment to ensure function and detect problems, using appropriate personal protective equipment, maintaining, and cleaning equipment, adjusting settings to ensure proper flow and coverage, and replenishing or disposing of excess or faulty solutions or materials.
In [123]:
import spacy
import pandas as pd
# Load the spaCy model
nlp = spacy.load("en_core_web_sm")
# Define the batch size and the total number of records to process
batch_size = 100
total_records = 5000 # process only the first 5000 records
# Function to process text in batches and generate dependency trees
def process_text_in_batches(data, batch_size):
for start_idx in range(0, total_records, batch_size):
end_idx = start_idx + batch_size
# Ensure not to exceed the total number of records
if end_idx > total_records:
end_idx = total_records
batch = data[start_idx:end_idx]
# Process each document in the batch
for doc in nlp.pipe(batch, disable=["ner", "lemmatizer"]): # Disable unnecessary pipeline components
for sent in doc.sents:
tree = [(token.text, token.dep_, token.head.text) for token in sent]
print("Sentence:", sent.text)
for t in tree:
print(f"{t[0]} ({t[1]} -> {t[2]})")
print("\n----------\n") # Separate sentences for readability
# Slice the DataFrame to get only the required column and rows
data_slice = df['Skills Statement'].iloc[:total_records]
# Call the function with the sliced data
process_text_in_batches(data_slice, batch_size)
Streaming output truncated to the last 5000 lines.
order (pobj -> in)
to (aux -> design)
design (acl -> order)
and (cc -> design)
create (conj -> design)
a (det -> mural)
mural (dobj -> create)
that (nsubj -> meets)
meets (relcl -> mural)
the (det -> café)
café (poss -> needs)
’s (case -> café)
needs (dobj -> meets)
. (punct -> research)
----------
Sentence: Develop the creative vision or direction for a project, and/or concepts that align with it - for example, interfaces for multimedia products; interior environments; the look of product lines; displays or exhibits.
Develop (ROOT -> Develop)
the (det -> vision)
creative (amod -> vision)
vision (dobj -> Develop)
or (cc -> vision)
direction (conj -> vision)
for (prep -> Develop)
a (det -> project)
project (pobj -> for)
, (punct -> Develop)
and/or (cc -> Develop)
concepts (conj -> Develop)
that (nsubj -> align)
align (relcl -> concepts)
with (prep -> align)
it (pobj -> with)
- (punct -> concepts)
for (prep -> concepts)
example (pobj -> for)
, (punct -> concepts)
interfaces (meta -> ,)
for (prep -> interfaces)
multimedia (compound -> products)
products (pobj -> for)
; (punct -> interfaces)
interior (amod -> environments)
environments (conj -> Develop)
; (punct -> environments)
the (det -> look)
look (conj -> environments)
of (prep -> look)
product (compound -> lines)
lines (pobj -> of)
; (punct -> look)
displays (conj -> look)
or (cc -> displays)
exhibits (conj -> displays)
. (punct -> Develop)
----------
Sentence: Set up and operate still or video cameras or photography equipment (including tripods, lenses, mounts, colour correction cards and batteries).
Set (ROOT -> Set)
up (prt -> Set)
and (cc -> Set)
operate (conj -> Set)
still (advmod -> operate)
or (cc -> still)
video (compound -> cameras)
cameras (conj -> still)
or (cc -> cameras)
photography (compound -> equipment)
equipment (conj -> cameras)
( (punct -> equipment)
including (prep -> equipment)
tripods (pobj -> including)
, (punct -> tripods)
lenses (conj -> tripods)
, (punct -> lenses)
mounts (conj -> lenses)
, (punct -> mounts)
colour (compound -> correction)
correction (compound -> cards)
cards (conj -> mounts)
and (cc -> cards)
batteries (conj -> cards)
) (punct -> Set)
. (punct -> Set)
----------
Sentence: Select, assemble, and position cameras, lenses, and relevant equipment, and test equipment prior to use.
Select (ROOT -> Select)
, (punct -> assemble)
assemble (conj -> Select)
, (punct -> assemble)
and (cc -> assemble)
position (compound -> cameras)
cameras (conj -> Select)
, (punct -> cameras)
lenses (conj -> cameras)
, (punct -> lenses)
and (cc -> lenses)
relevant (amod -> equipment)
equipment (conj -> lenses)
, (punct -> equipment)
and (cc -> equipment)
test (compound -> equipment)
equipment (conj -> equipment)
prior (advmod -> Select)
to (aux -> use)
use (xcomp -> Select)
. (punct -> Select)
----------
Sentence: Check that memory cards have adequate storage or film rolls have enough shots or footage space.
Check (ROOT -> Check)
that (det -> cards)
memory (compound -> cards)
cards (nsubj -> have)
have (ccomp -> Check)
adequate (amod -> storage)
storage (nsubj -> have)
or (cc -> storage)
film (compound -> rolls)
rolls (conj -> storage)
have (ccomp -> have)
enough (amod -> shots)
shots (dobj -> have)
or (cc -> shots)
footage (compound -> space)
space (conj -> shots)
. (punct -> Check)
----------
Sentence: Alter exposure, aperture, shutter speed and framing in order to achieve optimal results where images are clear, focused and adhere to project specifications or clients’ requirements.
Alter (compound -> exposure)
exposure (ROOT -> exposure)
, (punct -> exposure)
aperture (amod -> exposure)
, (punct -> aperture)
shutter (compound -> speed)
speed (conj -> aperture)
and (cc -> speed)
framing (conj -> speed)
in (prep -> exposure)
order (pobj -> in)
to (aux -> achieve)
achieve (acl -> order)
optimal (amod -> results)
results (dobj -> achieve)
where (advmod -> are)
images (nsubj -> are)
are (relcl -> results)
clear (advmod -> focused)
, (punct -> focused)
focused (acomp -> are)
and (cc -> focused)
adhere (conj -> focused)
to (aux -> project)
project (advcl -> adhere)
specifications (dobj -> project)
or (cc -> specifications)
clients (poss -> requirements)
’ (case -> clients)
requirements (conj -> specifications)
. (punct -> exposure)
----------
Sentence: Adjust the placement of props or subjects to ensure appropriate use of depth of field, lighting, and the environment.
Adjust (ROOT -> Adjust)
the (det -> placement)
placement (dobj -> Adjust)
of (prep -> placement)
props (pobj -> of)
or (cc -> props)
subjects (conj -> props)
to (aux -> ensure)
ensure (advcl -> Adjust)
appropriate (amod -> use)
use (dobj -> ensure)
of (prep -> use)
depth (pobj -> of)
of (prep -> depth)
field (pobj -> of)
, (punct -> use)
lighting (conj -> use)
, (punct -> lighting)
and (cc -> lighting)
the (det -> environment)
environment (conj -> lighting)
. (punct -> Adjust)
----------
Sentence: Determine the technical requirements in order to create successful artistic or audiovisual productions or projects.
Determine (ROOT -> Determine)
the (det -> requirements)
technical (amod -> requirements)
requirements (dobj -> Determine)
in (prep -> Determine)
order (pobj -> in)
to (aux -> create)
create (acl -> order)
successful (amod -> productions)
artistic (amod -> productions)
or (cc -> artistic)
audiovisual (conj -> artistic)
productions (dobj -> create)
or (cc -> productions)
projects (conj -> productions)
. (punct -> Determine)
----------
Sentence: Review scripts, storyboards, direction from producers, directors or leaders, and other information sources to ensure understanding of project and determine work requirements.
Review (compound -> scripts)
scripts (ROOT -> scripts)
, (punct -> scripts)
storyboards (nmod -> direction)
, (punct -> storyboards)
direction (appos -> scripts)
from (prep -> direction)
producers (pobj -> from)
, (punct -> producers)
directors (conj -> producers)
or (cc -> directors)
leaders (conj -> directors)
, (punct -> direction)
and (cc -> direction)
other (amod -> sources)
information (compound -> sources)
sources (conj -> direction)
to (aux -> ensure)
ensure (acl -> scripts)
understanding (dobj -> ensure)
of (prep -> understanding)
project (pobj -> of)
and (cc -> ensure)
determine (conj -> ensure)
work (compound -> requirements)
requirements (dobj -> determine)
. (punct -> scripts)
----------
Sentence: These may include the required materials, resources, equipment, tools, machinery, timeframes, dependencies, procedures, processes, sequences, or methods to deliver the required outcome.
These (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> materials)
required (amod -> materials)
materials (dobj -> include)
, (punct -> materials)
resources (conj -> materials)
, (punct -> resources)
equipment (conj -> resources)
, (punct -> equipment)
tools (conj -> equipment)
, (punct -> tools)
machinery (conj -> tools)
, (punct -> machinery)
timeframes (conj -> machinery)
, (punct -> timeframes)
dependencies (conj -> timeframes)
, (punct -> dependencies)
procedures (conj -> dependencies)
, (punct -> procedures)
processes (conj -> procedures)
, (punct -> processes)
sequences (conj -> processes)
, (punct -> sequences)
or (cc -> sequences)
methods (conj -> sequences)
to (aux -> deliver)
deliver (xcomp -> include)
the (det -> outcome)
required (amod -> outcome)
outcome (dobj -> deliver)
. (punct -> include)
----------
Sentence: Play chords, notes, melodies, or rhythms on musical instruments in order to perform for audiences, rehearsals and recordings or demonstrate how to play or use instruments for customers or students.
Play (ROOT -> Play)
chords (dobj -> Play)
, (punct -> chords)
notes (conj -> chords)
, (punct -> notes)
melodies (conj -> notes)
, (punct -> melodies)
or (cc -> melodies)
rhythms (conj -> melodies)
on (prep -> rhythms)
musical (amod -> instruments)
instruments (pobj -> on)
in (prep -> Play)
order (pobj -> in)
to (aux -> perform)
perform (acl -> order)
for (prep -> perform)
audiences (pobj -> for)
, (punct -> audiences)
rehearsals (conj -> audiences)
and (cc -> rehearsals)
recordings (conj -> rehearsals)
or (cc -> perform)
demonstrate (conj -> perform)
how (advmod -> play)
to (aux -> play)
play (xcomp -> demonstrate)
or (cc -> play)
use (conj -> play)
instruments (dobj -> use)
for (prep -> instruments)
customers (pobj -> for)
or (cc -> customers)
students (conj -> customers)
. (punct -> Play)
----------
Sentence: Account for the required projection, tone, and artistic direction of the project.
Account (ROOT -> Account)
for (prep -> Account)
the (det -> direction)
required (amod -> projection)
projection (nmod -> direction)
, (punct -> projection)
tone (conj -> projection)
, (punct -> tone)
and (cc -> tone)
artistic (amod -> direction)
direction (pobj -> for)
of (prep -> direction)
the (det -> project)
project (pobj -> of)
. (punct -> Account)
----------
Sentence: It may be appropriate to play in time to a metronome, read from sheet music or tabs, coordinate with other musicians, or provide prompts to musicians learning their parts.
It (nsubj -> be)
may (aux -> be)
be (ROOT -> be)
appropriate (acomp -> be)
to (aux -> play)
play (xcomp -> be)
in (prep -> play)
time (pobj -> in)
to (prep -> play)
a (det -> metronome)
metronome (pobj -> to)
, (punct -> be)
read (advcl -> be)
from (prep -> read)
sheet (compound -> music)
music (pobj -> from)
or (cc -> music)
tabs (conj -> music)
, (punct -> read)
coordinate (conj -> read)
with (prep -> coordinate)
other (amod -> musicians)
musicians (pobj -> with)
, (punct -> coordinate)
or (cc -> coordinate)
provide (conj -> coordinate)
prompts (dobj -> provide)
to (dative -> provide)
musicians (pobj -> to)
learning (acl -> musicians)
their (poss -> parts)
parts (dobj -> learning)
. (punct -> be)
----------
Sentence: Have discussions with customers, clients, or designers to determine or check the features, details, requirements, preferences, expectations, and other specifications of a product, good, or service.
Have (ROOT -> Have)
discussions (dobj -> Have)
with (prep -> discussions)
customers (pobj -> with)
, (punct -> customers)
clients (conj -> customers)
, (punct -> clients)
or (cc -> clients)
designers (conj -> clients)
to (aux -> determine)
determine (acl -> discussions)
or (cc -> determine)
check (conj -> determine)
the (det -> features)
features (dobj -> check)
, (punct -> features)
details (conj -> features)
, (punct -> details)
requirements (conj -> details)
, (punct -> requirements)
preferences (conj -> requirements)
, (punct -> preferences)
expectations (conj -> preferences)
, (punct -> expectations)
and (cc -> expectations)
other (amod -> specifications)
specifications (conj -> expectations)
of (prep -> specifications)
a (det -> product)
product (pobj -> of)
, (punct -> product)
good (conj -> product)
, (punct -> good)
or (cc -> good)
service (conj -> good)
. (punct -> Have)
----------
Sentence: This may involve negotiating to agree on details that are reasonable for applicable timeframes, budgets, standards, or safety requirements; presenting updates, design modifications or progress; explaining processes or procedures; and ensuring final output meets needs or requests.
This (nsubj -> involve)
may (aux -> involve)
involve (ccomp -> meets)
negotiating (xcomp -> involve)
to (aux -> agree)
agree (xcomp -> negotiating)
on (prep -> agree)
details (pobj -> on)
that (nsubj -> are)
are (relcl -> details)
reasonable (acomp -> are)
for (prep -> reasonable)
applicable (amod -> timeframes)
timeframes (pobj -> for)
, (punct -> timeframes)
budgets (conj -> timeframes)
, (punct -> budgets)
standards (conj -> budgets)
, (punct -> standards)
or (cc -> standards)
safety (compound -> requirements)
requirements (conj -> standards)
; (punct -> meets)
presenting (amod -> updates)
updates (nsubj -> meets)
, (punct -> updates)
design (compound -> modifications)
modifications (conj -> updates)
or (cc -> modifications)
progress (conj -> modifications)
; (punct -> updates)
explaining (csubj -> meets)
processes (dobj -> explaining)
or (cc -> processes)
procedures (conj -> processes)
; (punct -> explaining)
and (cc -> explaining)
ensuring (conj -> explaining)
final (amod -> output)
output (dobj -> ensuring)
meets (ROOT -> meets)
needs (dobj -> meets)
or (cc -> needs)
requests (conj -> needs)
. (punct -> meets)
----------
Sentence: Attach decorative or functional accessories or fittings (such as buttons, handles, hooks, or zippers) to products in order to enhance usability or functionality, support accessibility needs, or create an aesthetically pleasing effect.
Attach (ROOT -> Attach)
decorative (amod -> accessories)
or (cc -> decorative)
functional (conj -> decorative)
accessories (dobj -> Attach)
or (cc -> accessories)
fittings (conj -> accessories)
( (punct -> fittings)
such (amod -> as)
as (prep -> fittings)
buttons (pobj -> as)
, (punct -> buttons)
handles (conj -> buttons)
, (punct -> handles)
hooks (conj -> handles)
, (punct -> hooks)
or (cc -> hooks)
zippers (conj -> hooks)
) (punct -> accessories)
to (prep -> Attach)
products (pobj -> to)
in (prep -> Attach)
order (pobj -> in)
to (aux -> enhance)
enhance (acl -> order)
usability (dobj -> enhance)
or (cc -> usability)
functionality (conj -> usability)
, (punct -> Attach)
support (conj -> Attach)
accessibility (compound -> needs)
needs (dobj -> support)
, (punct -> support)
or (cc -> support)
create (conj -> support)
an (det -> effect)
aesthetically (advmod -> pleasing)
pleasing (amod -> effect)
effect (dobj -> create)
. (punct -> Attach)
----------
Sentence: This may involve reviewing designs, conferring with customers to review their needs or preferences, selecting appropriate accessories and methods for attachment (such as adhesives, fastenings, welding, or stitching), and checking products to ensure functionality and quality.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
reviewing (amod -> designs)
designs (dobj -> involve)
, (punct -> involve)
conferring (advcl -> involve)
with (prep -> conferring)
customers (pobj -> with)
to (aux -> review)
review (advcl -> conferring)
their (poss -> needs)
needs (dobj -> review)
or (cc -> needs)
preferences (conj -> needs)
, (punct -> review)
selecting (conj -> conferring)
appropriate (amod -> accessories)
accessories (dobj -> selecting)
and (cc -> accessories)
methods (conj -> accessories)
for (prep -> accessories)
attachment (pobj -> for)
( (punct -> accessories)
such (amod -> as)
as (prep -> accessories)
adhesives (pobj -> as)
, (punct -> adhesives)
fastenings (conj -> adhesives)
, (punct -> fastenings)
welding (conj -> fastenings)
, (punct -> welding)
or (cc -> welding)
stitching (conj -> welding)
) (punct -> accessories)
, (punct -> involve)
and (cc -> involve)
checking (conj -> involve)
products (dobj -> checking)
to (aux -> ensure)
ensure (advcl -> checking)
functionality (dobj -> ensure)
and (cc -> functionality)
quality (conj -> functionality)
. (punct -> involve)
----------
Sentence: Estimate the costs of goods, services, or materials by considering factors such as labour, production, transportation, or procurement expenses.
Estimate (ROOT -> Estimate)
the (det -> costs)
costs (dobj -> Estimate)
of (prep -> costs)
goods (pobj -> of)
, (punct -> goods)
services (conj -> goods)
, (punct -> services)
or (cc -> services)
materials (conj -> services)
by (prep -> Estimate)
considering (pcomp -> by)
factors (dobj -> considering)
such (amod -> as)
as (prep -> factors)
labour (pobj -> as)
, (punct -> labour)
production (conj -> labour)
, (punct -> production)
transportation (conj -> production)
, (punct -> transportation)
or (cc -> transportation)
procurement (compound -> expenses)
expenses (conj -> transportation)
. (punct -> Estimate)
----------
Sentence: Utilise cost estimation techniques, undertake research, and consider recent trends and historical data to develop accurate and realistic cost projections.
Utilise (compound -> techniques)
cost (compound -> techniques)
estimation (compound -> techniques)
techniques (nsubj -> undertake)
, (punct -> techniques)
undertake (ROOT -> undertake)
research (dobj -> undertake)
, (punct -> undertake)
and (cc -> undertake)
consider (conj -> undertake)
recent (amod -> trends)
trends (dobj -> consider)
and (cc -> trends)
historical (amod -> data)
data (conj -> trends)
to (aux -> develop)
develop (advcl -> consider)
accurate (amod -> projections)
and (cc -> accurate)
realistic (conj -> accurate)
cost (compound -> projections)
projections (dobj -> develop)
. (punct -> undertake)
----------
Sentence: Return functionality, structural integrity or desired appearance to textiles or apparel by mending damage (including sewing, patching, refinishing leather) or replacing components (such as zips, buttons, or heels).
Return (ROOT -> Return)
functionality (dobj -> Return)
, (punct -> functionality)
structural (amod -> integrity)
integrity (conj -> functionality)
or (cc -> integrity)
desired (amod -> appearance)
appearance (conj -> integrity)
to (prep -> appearance)
textiles (pobj -> to)
or (cc -> textiles)
apparel (conj -> textiles)
by (prep -> Return)
mending (pcomp -> by)
damage (dobj -> mending)
( (punct -> damage)
including (prep -> damage)
sewing (pobj -> including)
, (punct -> sewing)
patching (conj -> sewing)
, (punct -> patching)
refinishing (amod -> leather)
leather (appos -> sewing)
) (punct -> sewing)
or (cc -> sewing)
replacing (conj -> sewing)
components (dobj -> replacing)
( (punct -> components)
such (amod -> as)
as (prep -> components)
zips (pobj -> as)
, (punct -> zips)
buttons (conj -> zips)
, (punct -> buttons)
or (cc -> buttons)
heels (conj -> buttons)
) (punct -> Return)
. (punct -> Return)
----------
Sentence: Exchange relevant knowledge, ideas, or insights with colleagues in order to support collaboration, effective work processes, informed decision-making and ongoing learning.
Exchange (advcl -> processes)
relevant (amod -> knowledge)
knowledge (dobj -> Exchange)
, (punct -> knowledge)
ideas (conj -> knowledge)
, (punct -> ideas)
or (cc -> ideas)
insights (conj -> ideas)
with (prep -> insights)
colleagues (pobj -> with)
in (prep -> Exchange)
order (pobj -> in)
to (aux -> support)
support (acl -> order)
collaboration (dobj -> support)
, (punct -> processes)
effective (amod -> processes)
work (compound -> processes)
processes (ROOT -> processes)
, (punct -> processes)
informed (amod -> making)
decision (compound -> making)
- (punct -> making)
making (conj -> processes)
and (cc -> making)
ongoing (amod -> learning)
learning (conj -> making)
. (punct -> processes)
----------
Sentence: Identify relevant information or staff members and communicate clearly or listen actively in order to ensure the effective exchange of information.
Identify (ROOT -> Identify)
relevant (amod -> information)
information (dobj -> Identify)
or (cc -> information)
staff (compound -> members)
members (conj -> information)
and (cc -> Identify)
communicate (conj -> Identify)
clearly (advmod -> communicate)
or (cc -> communicate)
listen (conj -> communicate)
actively (advmod -> listen)
in (prep -> listen)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
the (det -> exchange)
effective (amod -> exchange)
exchange (dobj -> ensure)
of (prep -> exchange)
information (pobj -> of)
. (punct -> Identify)
----------
Sentence: This may involve facilitating co-design, coordinating timelines or rosters, giving, or receiving technical expertise and guidance, receiving or providing details or updates about a product or service, reviewing work activities or performance, or otherwise having discussions with colleagues about relevant information.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
facilitating (xcomp -> involve)
co (dobj -> facilitating)
- (dobj -> facilitating)
design (dobj -> facilitating)
, (punct -> facilitating)
coordinating (conj -> facilitating)
timelines (dobj -> coordinating)
or (cc -> timelines)
rosters (conj -> timelines)
, (punct -> coordinating)
giving (conj -> coordinating)
, (punct -> giving)
or (cc -> giving)
receiving (conj -> giving)
technical (amod -> expertise)
expertise (dobj -> receiving)
and (cc -> expertise)
guidance (conj -> expertise)
, (punct -> receiving)
receiving (conj -> receiving)
or (cc -> receiving)
providing (conj -> receiving)
details (dobj -> providing)
or (cc -> details)
updates (conj -> details)
about (prep -> details)
a (det -> product)
product (pobj -> about)
or (cc -> product)
service (conj -> product)
, (punct -> receiving)
reviewing (conj -> receiving)
work (compound -> activities)
activities (dobj -> reviewing)
or (cc -> activities)
performance (conj -> activities)
, (punct -> reviewing)
or (cc -> reviewing)
otherwise (advmod -> having)
having (conj -> reviewing)
discussions (dobj -> having)
with (prep -> discussions)
colleagues (pobj -> with)
about (prep -> colleagues)
relevant (amod -> information)
information (pobj -> about)
. (punct -> involve)
----------
Sentence: Cut and trim fabrics, textiles, leather or hide, using appropriate techniques and tools based on material requirements.
Cut (ROOT -> Cut)
and (cc -> Cut)
trim (conj -> Cut)
fabrics (dobj -> trim)
, (punct -> fabrics)
textiles (conj -> fabrics)
, (punct -> textiles)
leather (conj -> textiles)
or (cc -> leather)
hide (conj -> leather)
, (punct -> Cut)
using (advcl -> Cut)
appropriate (amod -> techniques)
techniques (dobj -> using)
and (cc -> techniques)
tools (conj -> techniques)
based (acl -> techniques)
on (prep -> based)
material (compound -> requirements)
requirements (pobj -> on)
. (punct -> Cut)
----------
Sentence: Account for factors such as material type, thickness, flexibility, or texture.
Account (ROOT -> Account)
for (prep -> Account)
factors (pobj -> for)
such (amod -> as)
as (prep -> factors)
material (compound -> type)
type (pobj -> as)
, (punct -> type)
thickness (conj -> type)
, (punct -> thickness)
flexibility (conj -> thickness)
, (punct -> flexibility)
or (cc -> flexibility)
texture (conj -> flexibility)
. (punct -> Account)
----------
Sentence: Follow measurements, patterns, or templates to ensure materials have the correct dimensions for use, installation, or further processing, meet quality requirements and to fix any deficiencies.
Follow (amod -> measurements)
measurements (nsubj -> have)
, (punct -> measurements)
patterns (conj -> measurements)
, (punct -> patterns)
or (cc -> patterns)
templates (conj -> patterns)
to (aux -> ensure)
ensure (relcl -> measurements)
materials (dobj -> ensure)
have (ROOT -> have)
the (det -> dimensions)
correct (amod -> dimensions)
dimensions (dobj -> have)
for (prep -> dimensions)
use (pobj -> for)
, (punct -> use)
installation (conj -> use)
, (punct -> installation)
or (cc -> installation)
further (amod -> processing)
processing (conj -> installation)
, (punct -> have)
meet (conj -> have)
quality (compound -> requirements)
requirements (dobj -> meet)
and (cc -> meet)
to (aux -> fix)
fix (conj -> meet)
any (det -> deficiencies)
deficiencies (dobj -> fix)
. (punct -> have)
----------
Sentence: Use appropriate measuring tools to mark reference points, cutting lines, or other indicators on materials for the purpose of adhering to blueprints or designs, recording measurements, or establishing accurate positioning or alignment of components during fabrication, construction or assembly.
Use (ROOT -> Use)
appropriate (amod -> tools)
measuring (compound -> tools)
tools (dobj -> Use)
to (aux -> mark)
mark (xcomp -> Use)
reference (compound -> points)
points (dobj -> mark)
, (punct -> mark)
cutting (advcl -> mark)
lines (dobj -> cutting)
, (punct -> lines)
or (cc -> lines)
other (amod -> indicators)
indicators (conj -> lines)
on (prep -> indicators)
materials (pobj -> on)
for (prep -> materials)
the (det -> purpose)
purpose (pobj -> for)
of (prep -> purpose)
adhering (pcomp -> of)
to (prep -> adhering)
blueprints (pobj -> to)
or (cc -> blueprints)
designs (conj -> blueprints)
, (punct -> blueprints)
recording (amod -> measurements)
measurements (appos -> blueprints)
, (punct -> cutting)
or (cc -> cutting)
establishing (conj -> cutting)
accurate (amod -> positioning)
positioning (dobj -> establishing)
or (cc -> positioning)
alignment (conj -> positioning)
of (prep -> alignment)
components (pobj -> of)
during (prep -> components)
fabrication (pobj -> during)
, (punct -> fabrication)
construction (conj -> fabrication)
or (cc -> construction)
assembly (conj -> construction)
. (punct -> Use)
----------
Sentence: Record operational or production data, identifying and capturing relevant information accurately and systemically, to enable the monitoring, control, or improvement of processes or to meet reporting and record keeping requirements.
Record (nmod -> data)
operational (amod -> data)
or (cc -> operational)
production (conj -> operational)
data (ROOT -> data)
, (punct -> data)
identifying (acl -> data)
and (cc -> identifying)
capturing (conj -> identifying)
relevant (amod -> information)
information (dobj -> capturing)
accurately (advmod -> capturing)
and (cc -> accurately)
systemically (conj -> accurately)
, (punct -> data)
to (aux -> enable)
enable (relcl -> data)
the (det -> monitoring)
monitoring (dobj -> enable)
, (punct -> monitoring)
control (conj -> monitoring)
, (punct -> control)
or (cc -> control)
improvement (conj -> control)
of (prep -> improvement)
processes (pobj -> of)
or (cc -> enable)
to (aux -> meet)
meet (conj -> enable)
reporting (dobj -> meet)
and (cc -> reporting)
record (compound -> keeping)
keeping (compound -> requirements)
requirements (conj -> reporting)
. (punct -> data)
----------
Sentence: This may include the use of industry-specific technical equipment or software.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> use)
use (dobj -> include)
of (prep -> use)
industry (npadvmod -> specific)
- (punct -> specific)
specific (amod -> equipment)
technical (amod -> equipment)
equipment (pobj -> of)
or (cc -> equipment)
software (conj -> equipment)
. (punct -> include)
----------
Sentence: Position and secure materials or work pieces onto production equipment in order to facilitate, processing, manufacturing, or production.
Position (nmod -> pieces)
and (cc -> Position)
secure (conj -> Position)
materials (conj -> Position)
or (cc -> materials)
work (conj -> Position)
pieces (ROOT -> pieces)
onto (prep -> pieces)
production (compound -> equipment)
equipment (pobj -> onto)
in (prep -> pieces)
order (pobj -> in)
to (aux -> facilitate)
facilitate (acl -> order)
, (punct -> facilitate)
processing (conj -> facilitate)
, (punct -> processing)
manufacturing (conj -> processing)
, (punct -> manufacturing)
or (cc -> manufacturing)
production (conj -> manufacturing)
. (punct -> pieces)
----------
Sentence: Follow work instructions, manufacturing specifications and work health and safety guidelines in order to determine safe methods, correct positioning, and suitable securement methods including appropriate fixtures, clamps, or adhesives.
Follow (ROOT -> Follow)
work (compound -> instructions)
instructions (dobj -> Follow)
, (punct -> instructions)
manufacturing (advcl -> Follow)
specifications (dobj -> manufacturing)
and (cc -> manufacturing)
work (conj -> manufacturing)
health (nmod -> guidelines)
and (cc -> health)
safety (conj -> health)
guidelines (dobj -> work)
in (prep -> manufacturing)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
safe (amod -> methods)
methods (dobj -> determine)
, (punct -> methods)
correct (amod -> positioning)
positioning (conj -> methods)
, (punct -> positioning)
and (cc -> positioning)
suitable (amod -> methods)
securement (amod -> methods)
methods (conj -> positioning)
including (prep -> methods)
appropriate (amod -> fixtures)
fixtures (pobj -> including)
, (punct -> fixtures)
clamps (conj -> fixtures)
, (punct -> clamps)
or (cc -> clamps)
adhesives (conj -> clamps)
. (punct -> Follow)
----------
Sentence: Inspect work and make adjustments in order to ensure safe and proper alignment.
Inspect (ROOT -> Inspect)
work (dobj -> Inspect)
and (cc -> Inspect)
make (conj -> Inspect)
adjustments (dobj -> make)
in (prep -> make)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
safe (amod -> alignment)
and (cc -> safe)
proper (conj -> safe)
alignment (dobj -> ensure)
. (punct -> Inspect)
----------
Sentence: Design and fabricate work aids, for example, patterns, templates, fixtures, or jigs, to support the production process and ensure accuracy and quality of work.
Design (ROOT -> Design)
and (cc -> Design)
fabricate (conj -> Design)
work (compound -> aids)
aids (dobj -> fabricate)
, (punct -> Design)
for (prep -> patterns)
example (pobj -> for)
, (punct -> patterns)
patterns (conj -> Design)
, (punct -> patterns)
templates (conj -> patterns)
, (punct -> templates)
fixtures (conj -> templates)
, (punct -> fixtures)
or (cc -> fixtures)
jigs (conj -> fixtures)
, (punct -> Design)
to (aux -> support)
support (advcl -> Design)
the (det -> process)
production (compound -> process)
process (dobj -> support)
and (cc -> support)
ensure (conj -> support)
accuracy (dobj -> ensure)
and (cc -> accuracy)
quality (conj -> accuracy)
of (prep -> accuracy)
work (pobj -> of)
. (punct -> Design)
----------
Sentence: Interpret requirements and specifications for work pieces in order to determine the required measurements, patterns, tools, equipment, materials, and instructions.
Interpret (ROOT -> Interpret)
requirements (dobj -> Interpret)
and (cc -> requirements)
specifications (conj -> requirements)
for (prep -> requirements)
work (compound -> pieces)
pieces (pobj -> for)
in (prep -> Interpret)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
the (det -> measurements)
required (amod -> measurements)
measurements (dobj -> determine)
, (punct -> measurements)
patterns (conj -> measurements)
, (punct -> patterns)
tools (conj -> patterns)
, (punct -> tools)
equipment (conj -> tools)
, (punct -> equipment)
materials (conj -> equipment)
, (punct -> materials)
and (cc -> materials)
instructions (conj -> materials)
. (punct -> Interpret)
----------
Sentence: This may include creating written or visual instructions to support task performance and ensure consistent outcomes.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
creating (xcomp -> include)
written (amod -> instructions)
or (cc -> written)
visual (conj -> written)
instructions (dobj -> creating)
to (aux -> support)
support (advcl -> creating)
task (compound -> performance)
performance (dobj -> support)
and (cc -> support)
ensure (conj -> support)
consistent (amod -> outcomes)
outcomes (dobj -> ensure)
. (punct -> include)
----------
Sentence: Review work aids against specifications, making alterations as necessary.
Review (nsubj -> work)
work (ROOT -> work)
aids (dobj -> work)
against (prep -> aids)
specifications (pobj -> against)
, (punct -> work)
making (advcl -> work)
alterations (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> work)
----------
Sentence: Join textiles or other materials together through the application of adhesives.
Join (ROOT -> Join)
textiles (dobj -> Join)
or (cc -> textiles)
other (amod -> materials)
materials (conj -> textiles)
together (advmod -> Join)
through (prep -> Join)
the (det -> application)
application (pobj -> through)
of (prep -> application)
adhesives (pobj -> of)
. (punct -> Join)
----------
Sentence: Review job specifications in order to determine factors such as appropriate materials, adhesives, tools, equipment, and techniques.
Review (ROOT -> Review)
job (compound -> specifications)
specifications (dobj -> Review)
in (prep -> Review)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
factors (dobj -> determine)
such (amod -> as)
as (prep -> factors)
appropriate (amod -> materials)
materials (pobj -> as)
, (punct -> materials)
adhesives (conj -> materials)
, (punct -> adhesives)
tools (conj -> adhesives)
, (punct -> tools)
equipment (conj -> tools)
, (punct -> equipment)
and (cc -> equipment)
techniques (conj -> equipment)
. (punct -> Review)
----------
Sentence: Follow manufacturer specifications to ensure proper application, curing, drying, and finishing techniques.
Follow (ROOT -> Follow)
manufacturer (compound -> specifications)
specifications (dobj -> Follow)
to (aux -> ensure)
ensure (advcl -> Follow)
proper (amod -> application)
application (dobj -> ensure)
, (punct -> Follow)
curing (advcl -> Follow)
, (punct -> curing)
drying (conj -> curing)
, (punct -> drying)
and (cc -> drying)
finishing (conj -> drying)
techniques (dobj -> finishing)
. (punct -> Follow)
----------
Sentence: Follow work health and safety procedures and inspect and test joins to ensure integrity and effectiveness of bond.
Follow (ROOT -> Follow)
work (nmod -> procedures)
health (nmod -> procedures)
and (cc -> health)
safety (conj -> health)
procedures (dobj -> Follow)
and (cc -> Follow)
inspect (conj -> Follow)
and (cc -> inspect)
test (conj -> inspect)
joins (conj -> Follow)
to (aux -> ensure)
ensure (advcl -> joins)
integrity (dobj -> ensure)
and (cc -> integrity)
effectiveness (conj -> integrity)
of (prep -> integrity)
bond (pobj -> of)
. (punct -> Follow)
----------
Sentence: Prepare fabrics, leathers or other materials for processing or production by cutting, folding, organising, or undertaking processes such as washing, soaking, drying, steaming, ironing, polishing, or buffing.
Prepare (ROOT -> Prepare)
fabrics (dobj -> Prepare)
, (punct -> fabrics)
leathers (conj -> fabrics)
or (cc -> leathers)
other (amod -> materials)
materials (conj -> leathers)
for (prep -> materials)
processing (pobj -> for)
or (cc -> processing)
production (conj -> processing)
by (prep -> Prepare)
cutting (pobj -> by)
, (punct -> cutting)
folding (conj -> cutting)
, (punct -> folding)
organising (conj -> folding)
, (punct -> organising)
or (cc -> organising)
undertaking (compound -> processes)
processes (dobj -> Prepare)
such (amod -> as)
as (prep -> processes)
washing (pobj -> as)
, (punct -> washing)
soaking (conj -> washing)
, (punct -> soaking)
drying (conj -> soaking)
, (punct -> drying)
steaming (conj -> drying)
, (punct -> steaming)
ironing (conj -> steaming)
, (punct -> ironing)
polishing (conj -> ironing)
, (punct -> polishing)
or (cc -> polishing)
buffing (conj -> polishing)
. (punct -> Prepare)
----------
Sentence: This may be done in order to ensure materials meet required production specifications such as size or shape, or to pre-shrink fabrics, clean materials, ensure colours do not run in the wash after production, and to soften and smooth materials.
This (nsubjpass -> done)
may (aux -> done)
be (auxpass -> done)
done (ROOT -> done)
in (prep -> done)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
materials (nsubj -> meet)
meet (ccomp -> ensure)
required (amod -> specifications)
production (compound -> specifications)
specifications (dobj -> meet)
such (amod -> as)
as (prep -> specifications)
size (pobj -> as)
or (cc -> size)
shape (conj -> size)
, (punct -> size)
or (cc -> size)
to (conj -> size)
pre (amod -> fabrics)
- (amod -> fabrics)
shrink (amod -> fabrics)
fabrics (pobj -> to)
, (punct -> fabrics)
clean (amod -> materials)
materials (conj -> size)
, (punct -> done)
ensure (conj -> done)
colours (nsubj -> run)
do (aux -> run)
not (neg -> run)
run (ccomp -> ensure)
in (prep -> run)
the (det -> wash)
wash (pobj -> in)
after (prep -> run)
production (pobj -> after)
, (punct -> run)
and (cc -> run)
to (aux -> soften)
soften (conj -> run)
and (cc -> soften)
smooth (amod -> materials)
materials (conj -> soften)
. (punct -> done)
----------
Sentence: Read, interpret, and understand work documentation such as reports, designs, blueprints, specifications, work orders, technical information, or other instructions to determine work requirements.
Read (ROOT -> Read)
, (punct -> Read)
interpret (conj -> Read)
, (punct -> interpret)
and (cc -> interpret)
understand (conj -> interpret)
work (compound -> documentation)
documentation (dobj -> understand)
such (amod -> as)
as (prep -> documentation)
reports (pobj -> as)
, (punct -> reports)
designs (conj -> reports)
, (punct -> designs)
blueprints (conj -> designs)
, (punct -> blueprints)
specifications (conj -> blueprints)
, (punct -> specifications)
work (compound -> orders)
orders (conj -> specifications)
, (punct -> orders)
technical (amod -> information)
information (conj -> orders)
, (punct -> information)
or (cc -> information)
other (amod -> instructions)
instructions (conj -> information)
to (aux -> determine)
determine (acl -> instructions)
work (compound -> requirements)
requirements (dobj -> determine)
. (punct -> Read)
----------
Sentence: These may include the required materials, resources, equipment, tools, machinery, timeframes, dependencies, procedures, processes, sequences, or methods to deliver the required outcome.
These (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> materials)
required (amod -> materials)
materials (dobj -> include)
, (punct -> materials)
resources (conj -> materials)
, (punct -> resources)
equipment (conj -> resources)
, (punct -> equipment)
tools (conj -> equipment)
, (punct -> tools)
machinery (conj -> tools)
, (punct -> machinery)
timeframes (conj -> machinery)
, (punct -> timeframes)
dependencies (conj -> timeframes)
, (punct -> dependencies)
procedures (conj -> dependencies)
, (punct -> procedures)
processes (conj -> procedures)
, (punct -> processes)
sequences (conj -> processes)
, (punct -> sequences)
or (cc -> sequences)
methods (conj -> sequences)
to (aux -> deliver)
deliver (xcomp -> include)
the (det -> outcome)
required (amod -> outcome)
outcome (dobj -> deliver)
. (punct -> include)
----------
Sentence: Align or position parts or work pieces to ensure proper fit, clearance, or interconnection of components and that assembly is completed accurately in accordance with job requirements or design specifications.
Align (nmod -> parts)
or (cc -> Align)
position (conj -> Align)
parts (nsubjpass -> completed)
or (cc -> parts)
work (conj -> parts)
pieces (dobj -> work)
to (aux -> ensure)
ensure (relcl -> parts)
proper (amod -> fit)
fit (dobj -> ensure)
, (punct -> fit)
clearance (conj -> fit)
, (punct -> clearance)
or (cc -> clearance)
interconnection (conj -> clearance)
of (prep -> interconnection)
components (pobj -> of)
and (cc -> parts)
that (det -> assembly)
assembly (nsubjpass -> completed)
is (auxpass -> completed)
completed (ROOT -> completed)
accurately (advmod -> completed)
in (prep -> completed)
accordance (pobj -> in)
with (prep -> accordance)
job (compound -> requirements)
requirements (pobj -> with)
or (cc -> requirements)
design (compound -> specifications)
specifications (conj -> requirements)
. (punct -> completed)
----------
Sentence: Identify appropriate alignment, spacing, positions or measurements by reviewing job specifications or other work instructions.
Identify (ROOT -> Identify)
appropriate (amod -> alignment)
alignment (dobj -> Identify)
, (punct -> alignment)
spacing (conj -> alignment)
, (punct -> spacing)
positions (conj -> spacing)
or (cc -> positions)
measurements (conj -> positions)
by (prep -> positions)
reviewing (pcomp -> by)
job (compound -> specifications)
specifications (dobj -> reviewing)
or (cc -> specifications)
other (amod -> instructions)
work (compound -> instructions)
instructions (conj -> specifications)
. (punct -> Identify)
----------
Sentence: This may include the use of tools or guides such as templates, jigs, and measurement techniques to ensure accurate alignment.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> use)
use (dobj -> include)
of (prep -> use)
tools (pobj -> of)
or (cc -> tools)
guides (conj -> tools)
such (amod -> as)
as (prep -> guides)
templates (pobj -> as)
, (punct -> templates)
jigs (conj -> templates)
, (punct -> jigs)
and (cc -> jigs)
measurement (compound -> techniques)
techniques (conj -> jigs)
to (aux -> ensure)
ensure (relcl -> use)
accurate (amod -> alignment)
alignment (dobj -> ensure)
. (punct -> include)
----------
Sentence: Check alignment against job requirements, making adjustments as necessary.
Check (ROOT -> Check)
alignment (dobj -> Check)
against (prep -> alignment)
job (compound -> requirements)
requirements (pobj -> against)
, (punct -> Check)
making (advcl -> Check)
adjustments (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> Check)
----------
Sentence: Adjust the alignment, positioning, or tension of fabrics or other materials during garment production or processing in order to facilitate the production process, ensure accurate cutting or stitching, or achieve desired effect.
Adjust (ROOT -> Adjust)
the (det -> alignment)
alignment (dobj -> Adjust)
, (punct -> alignment)
positioning (conj -> alignment)
, (punct -> positioning)
or (cc -> positioning)
tension (conj -> positioning)
of (prep -> tension)
fabrics (pobj -> of)
or (cc -> fabrics)
other (amod -> materials)
materials (conj -> fabrics)
during (prep -> Adjust)
garment (compound -> production)
production (pobj -> during)
or (cc -> production)
processing (conj -> production)
in (prep -> Adjust)
order (pobj -> in)
to (aux -> facilitate)
facilitate (acl -> order)
the (det -> process)
production (compound -> process)
process (dobj -> facilitate)
, (punct -> Adjust)
ensure (conj -> Adjust)
accurate (amod -> cutting)
cutting (dobj -> ensure)
or (cc -> cutting)
stitching (conj -> cutting)
, (punct -> ensure)
or (cc -> ensure)
achieve (conj -> ensure)
desired (amod -> effect)
effect (dobj -> achieve)
. (punct -> Adjust)
----------
Sentence: This may involve techniques such as stretching, folding, bending, shaping, straightening, or smoothing.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
techniques (dobj -> involve)
such (amod -> as)
as (prep -> techniques)
stretching (pobj -> as)
, (punct -> stretching)
folding (conj -> stretching)
, (punct -> folding)
bending (conj -> folding)
, (punct -> bending)
shaping (conj -> bending)
, (punct -> shaping)
straightening (conj -> shaping)
, (punct -> straightening)
or (cc -> straightening)
smoothing (conj -> straightening)
. (punct -> involve)
----------
Sentence: Inspect and further adjust materials as necessary to ensure the final garment meets job requirements.
Inspect (ROOT -> Inspect)
and (cc -> Inspect)
further (advmod -> adjust)
adjust (conj -> Inspect)
materials (dobj -> adjust)
as (prep -> adjust)
necessary (amod -> as)
to (aux -> ensure)
ensure (advcl -> adjust)
the (det -> garment)
final (amod -> garment)
garment (nsubj -> meets)
meets (ccomp -> ensure)
job (compound -> requirements)
requirements (dobj -> meets)
. (punct -> Inspect)
----------
Sentence: Layout and mark guidelines on materials or workpieces to ensure accurate and consistent placements, measurements, and cuts.
Layout (ROOT -> Layout)
and (cc -> Layout)
mark (conj -> Layout)
guidelines (dobj -> mark)
on (prep -> guidelines)
materials (pobj -> on)
or (cc -> materials)
workpieces (conj -> materials)
to (aux -> ensure)
ensure (advcl -> mark)
accurate (amod -> placements)
and (cc -> accurate)
consistent (conj -> accurate)
placements (dobj -> ensure)
, (punct -> placements)
measurements (conj -> placements)
, (punct -> measurements)
and (cc -> measurements)
cuts (conj -> measurements)
. (punct -> Layout)
----------
Sentence: Utilise patterns, templates, blueprints, or sketches to ensure work is produced in accordance with job specifications.
Utilise (compound -> patterns)
patterns (ROOT -> patterns)
, (punct -> patterns)
templates (conj -> patterns)
, (punct -> templates)
blueprints (conj -> templates)
, (punct -> blueprints)
or (cc -> blueprints)
sketches (conj -> blueprints)
to (aux -> ensure)
ensure (xcomp -> sketches)
work (nsubjpass -> produced)
is (auxpass -> produced)
produced (ccomp -> ensure)
in (prep -> produced)
accordance (pobj -> in)
with (prep -> accordance)
job (compound -> specifications)
specifications (pobj -> with)
. (punct -> patterns)
----------
Sentence: Join fabric or materials together using needles, threads and hand techniques or sewing machines.
Join (ROOT -> Join)
fabric (dobj -> Join)
or (cc -> fabric)
materials (conj -> fabric)
together (advmod -> using)
using (advcl -> Join)
needles (dobj -> using)
, (punct -> needles)
threads (conj -> needles)
and (cc -> threads)
hand (compound -> techniques)
techniques (conj -> threads)
or (cc -> techniques)
sewing (conj -> techniques)
machines (appos -> needles)
. (punct -> Join)
----------
Sentence: Follow patterns, instructions, design or work specifications, maintain appropriate tension, and select appropriate tools according to material requirements.
Follow (ROOT -> Follow)
patterns (dobj -> Follow)
, (punct -> patterns)
instructions (conj -> patterns)
, (punct -> instructions)
design (nmod -> specifications)
or (cc -> design)
work (conj -> design)
specifications (conj -> instructions)
, (punct -> Follow)
maintain (conj -> Follow)
appropriate (amod -> tension)
tension (dobj -> maintain)
, (punct -> maintain)
and (cc -> maintain)
select (conj -> maintain)
appropriate (amod -> tools)
tools (dobj -> select)
according (prep -> select)
to (prep -> according)
material (compound -> requirements)
requirements (pobj -> to)
. (punct -> Follow)
----------
Sentence: Form specific shapes, patterns, textures or other decorative designs on the surfaces or edges of wooden objects or structures.
Form (ROOT -> Form)
specific (amod -> shapes)
shapes (dobj -> Form)
, (punct -> shapes)
patterns (conj -> shapes)
, (punct -> patterns)
textures (conj -> patterns)
or (cc -> textures)
other (amod -> designs)
decorative (amod -> designs)
designs (conj -> textures)
on (prep -> shapes)
the (det -> surfaces)
surfaces (pobj -> on)
or (cc -> surfaces)
edges (conj -> surfaces)
of (prep -> surfaces)
wooden (amod -> objects)
objects (pobj -> of)
or (cc -> objects)
structures (conj -> objects)
. (punct -> Form)
----------
Sentence: This may involve the use of hand or power tools and equipment, understanding the properties of various wood types and selecting tools or techniques accordingly, positioning and securing shaping equipment, checking blades or abrasive attachments for sharpness and alignment, replacing components when they are no longer safe for use, and reapplying finishes or paint on wooden surfaces.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
the (det -> use)
use (dobj -> involve)
of (prep -> use)
hand (nmod -> tools)
or (cc -> hand)
power (conj -> hand)
tools (pobj -> of)
and (cc -> tools)
equipment (conj -> tools)
, (punct -> involve)
understanding (advcl -> involve)
the (det -> properties)
properties (dobj -> understanding)
of (prep -> properties)
various (amod -> types)
wood (compound -> types)
types (pobj -> of)
and (cc -> types)
selecting (conj -> understanding)
tools (dobj -> selecting)
or (cc -> tools)
techniques (conj -> tools)
accordingly (advmod -> selecting)
, (punct -> selecting)
positioning (conj -> selecting)
and (cc -> positioning)
securing (conj -> positioning)
shaping (compound -> equipment)
equipment (dobj -> securing)
, (punct -> positioning)
checking (compound -> blades)
blades (conj -> positioning)
or (cc -> blades)
abrasive (amod -> attachments)
attachments (conj -> blades)
for (prep -> positioning)
sharpness (pobj -> for)
and (cc -> sharpness)
alignment (conj -> sharpness)
, (punct -> selecting)
replacing (conj -> selecting)
components (dobj -> replacing)
when (advmod -> are)
they (nsubj -> are)
are (advcl -> replacing)
no (neg -> longer)
longer (advmod -> are)
safe (acomp -> are)
for (prep -> safe)
use (pobj -> for)
, (punct -> replacing)
and (cc -> replacing)
reapplying (conj -> replacing)
finishes (dobj -> reapplying)
or (cc -> finishes)
paint (conj -> finishes)
on (prep -> paint)
wooden (amod -> surfaces)
surfaces (pobj -> on)
. (punct -> involve)
----------
Sentence: Monitor the jig or shape and adjust to ensure desired outcomes, and review designs or plans to confirm outcomes align with job requirements.
Monitor (ROOT -> Monitor)
the (det -> jig)
jig (dobj -> Monitor)
or (cc -> jig)
shape (conj -> jig)
and (cc -> Monitor)
adjust (conj -> Monitor)
to (aux -> ensure)
ensure (advcl -> adjust)
desired (amod -> outcomes)
outcomes (dobj -> ensure)
, (punct -> Monitor)
and (cc -> Monitor)
review (conj -> Monitor)
designs (dobj -> review)
or (cc -> designs)
plans (conj -> designs)
to (aux -> confirm)
confirm (xcomp -> plans)
outcomes (dobj -> confirm)
align (ccomp -> confirm)
with (prep -> align)
job (compound -> requirements)
requirements (pobj -> with)
. (punct -> Monitor)
----------
Sentence: Prepare fabrics, leathers or other materials for processing or production by cutting, folding, organising, or undertaking processes such as washing, soaking, drying, steaming, ironing, polishing, or buffing.
Prepare (ROOT -> Prepare)
fabrics (dobj -> Prepare)
, (punct -> fabrics)
leathers (conj -> fabrics)
or (cc -> leathers)
other (amod -> materials)
materials (conj -> leathers)
for (prep -> materials)
processing (pobj -> for)
or (cc -> processing)
production (conj -> processing)
by (prep -> Prepare)
cutting (pobj -> by)
, (punct -> cutting)
folding (conj -> cutting)
, (punct -> folding)
organising (conj -> folding)
, (punct -> organising)
or (cc -> organising)
undertaking (compound -> processes)
processes (dobj -> Prepare)
such (amod -> as)
as (prep -> processes)
washing (pobj -> as)
, (punct -> washing)
soaking (conj -> washing)
, (punct -> soaking)
drying (conj -> soaking)
, (punct -> drying)
steaming (conj -> drying)
, (punct -> steaming)
ironing (conj -> steaming)
, (punct -> ironing)
polishing (conj -> ironing)
, (punct -> polishing)
or (cc -> polishing)
buffing (conj -> polishing)
. (punct -> Prepare)
----------
Sentence: This may be done in order to ensure materials meet required production specifications such as size or shape, or to pre-shrink fabrics, clean materials, ensure colours do not run in the wash after production, and to soften and smooth materials.
This (nsubjpass -> done)
may (aux -> done)
be (auxpass -> done)
done (ROOT -> done)
in (prep -> done)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
materials (nsubj -> meet)
meet (ccomp -> ensure)
required (amod -> specifications)
production (compound -> specifications)
specifications (dobj -> meet)
such (amod -> as)
as (prep -> specifications)
size (pobj -> as)
or (cc -> size)
shape (conj -> size)
, (punct -> size)
or (cc -> size)
to (conj -> size)
pre (amod -> fabrics)
- (amod -> fabrics)
shrink (amod -> fabrics)
fabrics (pobj -> to)
, (punct -> fabrics)
clean (amod -> materials)
materials (conj -> size)
, (punct -> done)
ensure (conj -> done)
colours (nsubj -> run)
do (aux -> run)
not (neg -> run)
run (ccomp -> ensure)
in (prep -> run)
the (det -> wash)
wash (pobj -> in)
after (prep -> run)
production (pobj -> after)
, (punct -> run)
and (cc -> run)
to (aux -> soften)
soften (conj -> run)
and (cc -> soften)
smooth (amod -> materials)
materials (conj -> soften)
. (punct -> done)
----------
Sentence: Join fabric or materials together using needles, threads and hand techniques or sewing machines.
Join (ROOT -> Join)
fabric (dobj -> Join)
or (cc -> fabric)
materials (conj -> fabric)
together (advmod -> using)
using (advcl -> Join)
needles (dobj -> using)
, (punct -> needles)
threads (conj -> needles)
and (cc -> threads)
hand (compound -> techniques)
techniques (conj -> threads)
or (cc -> techniques)
sewing (conj -> techniques)
machines (appos -> needles)
. (punct -> Join)
----------
Sentence: Follow patterns, instructions, design or work specifications, maintain appropriate tension, and select appropriate tools according to material requirements.
Follow (ROOT -> Follow)
patterns (dobj -> Follow)
, (punct -> patterns)
instructions (conj -> patterns)
, (punct -> instructions)
design (nmod -> specifications)
or (cc -> design)
work (conj -> design)
specifications (conj -> instructions)
, (punct -> Follow)
maintain (conj -> Follow)
appropriate (amod -> tension)
tension (dobj -> maintain)
, (punct -> maintain)
and (cc -> maintain)
select (conj -> maintain)
appropriate (amod -> tools)
tools (dobj -> select)
according (prep -> select)
to (prep -> according)
material (compound -> requirements)
requirements (pobj -> to)
. (punct -> Follow)
----------
Sentence: Use appropriate measuring tools to mark reference points, cutting lines, or other indicators on materials for the purpose of adhering to blueprints or designs, recording measurements, or establishing accurate positioning or alignment of components during fabrication, construction or assembly.
Use (ROOT -> Use)
appropriate (amod -> tools)
measuring (compound -> tools)
tools (dobj -> Use)
to (aux -> mark)
mark (xcomp -> Use)
reference (compound -> points)
points (dobj -> mark)
, (punct -> mark)
cutting (advcl -> mark)
lines (dobj -> cutting)
, (punct -> lines)
or (cc -> lines)
other (amod -> indicators)
indicators (conj -> lines)
on (prep -> indicators)
materials (pobj -> on)
for (prep -> materials)
the (det -> purpose)
purpose (pobj -> for)
of (prep -> purpose)
adhering (pcomp -> of)
to (prep -> adhering)
blueprints (pobj -> to)
or (cc -> blueprints)
designs (conj -> blueprints)
, (punct -> blueprints)
recording (amod -> measurements)
measurements (appos -> blueprints)
, (punct -> cutting)
or (cc -> cutting)
establishing (conj -> cutting)
accurate (amod -> positioning)
positioning (dobj -> establishing)
or (cc -> positioning)
alignment (conj -> positioning)
of (prep -> alignment)
components (pobj -> of)
during (prep -> components)
fabrication (pobj -> during)
, (punct -> fabrication)
construction (conj -> fabrication)
or (cc -> construction)
assembly (conj -> construction)
. (punct -> Use)
----------
Sentence: Clean carpets, rugs, upholstery or drapery in order to maintain cleanliness, appearance, lifespan or functionality.
Clean (amod -> carpets)
carpets (ROOT -> carpets)
, (punct -> carpets)
rugs (conj -> carpets)
, (punct -> rugs)
upholstery (conj -> rugs)
or (cc -> upholstery)
drapery (conj -> upholstery)
in (prep -> carpets)
order (pobj -> in)
to (aux -> maintain)
maintain (acl -> order)
cleanliness (dobj -> maintain)
, (punct -> cleanliness)
appearance (conj -> cleanliness)
, (punct -> appearance)
lifespan (conj -> appearance)
or (cc -> lifespan)
functionality (conj -> lifespan)
. (punct -> carpets)
----------
Sentence: This may involve dusting, shaking, vacuuming, shampooing materials or applying other topical agents in order to remove stains, dust, dirt, and other contaminants.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
dusting (xcomp -> involve)
, (punct -> dusting)
shaking (conj -> dusting)
, (punct -> shaking)
vacuuming (conj -> shaking)
, (punct -> vacuuming)
shampooing (conj -> vacuuming)
materials (dobj -> shampooing)
or (cc -> shampooing)
applying (conj -> shampooing)
other (amod -> agents)
topical (amod -> agents)
agents (dobj -> applying)
in (prep -> involve)
order (pobj -> in)
to (aux -> remove)
remove (acl -> order)
stains (dobj -> remove)
, (punct -> stains)
dust (conj -> stains)
, (punct -> dust)
dirt (conj -> dust)
, (punct -> dirt)
and (cc -> dirt)
other (amod -> contaminants)
contaminants (conj -> dirt)
. (punct -> involve)
----------
Sentence: Choose cleaning products and methods in accordance with the requirements or sensitivities of the surface or material, according to manufacturer’s recommendations, established work procedures or best practice, and align with work health and safety requirements including for the use of hazardous substances.
Choose (ROOT -> Choose)
cleaning (xcomp -> Choose)
products (dobj -> cleaning)
and (cc -> products)
methods (conj -> products)
in (prep -> cleaning)
accordance (pobj -> in)
with (prep -> accordance)
the (det -> requirements)
requirements (pobj -> with)
or (cc -> requirements)
sensitivities (conj -> requirements)
of (prep -> requirements)
the (det -> surface)
surface (pobj -> of)
or (cc -> surface)
material (conj -> surface)
, (punct -> Choose)
according (prep -> Choose)
to (prep -> according)
manufacturer (poss -> recommendations)
’s (case -> manufacturer)
recommendations (pobj -> to)
, (punct -> Choose)
established (conj -> Choose)
work (compound -> procedures)
procedures (dobj -> established)
or (cc -> procedures)
best (amod -> practice)
practice (conj -> procedures)
, (punct -> established)
and (cc -> established)
align (conj -> established)
with (prep -> align)
work (nmod -> requirements)
health (nmod -> requirements)
and (cc -> health)
safety (conj -> health)
requirements (pobj -> with)
including (prep -> requirements)
for (prep -> including)
the (det -> use)
use (pobj -> for)
of (prep -> use)
hazardous (amod -> substances)
substances (pobj -> of)
. (punct -> Choose)
----------
Sentence: Form specific shapes, patterns, textures or other decorative designs on the surfaces or edges of wooden objects or structures.
Form (ROOT -> Form)
specific (amod -> shapes)
shapes (dobj -> Form)
, (punct -> shapes)
patterns (conj -> shapes)
, (punct -> patterns)
textures (conj -> patterns)
or (cc -> textures)
other (amod -> designs)
decorative (amod -> designs)
designs (conj -> textures)
on (prep -> shapes)
the (det -> surfaces)
surfaces (pobj -> on)
or (cc -> surfaces)
edges (conj -> surfaces)
of (prep -> surfaces)
wooden (amod -> objects)
objects (pobj -> of)
or (cc -> objects)
structures (conj -> objects)
. (punct -> Form)
----------
Sentence: This may involve the use of hand or power tools and equipment, understanding the properties of various wood types and selecting tools or techniques accordingly, positioning and securing shaping equipment, checking blades or abrasive attachments for sharpness and alignment, replacing components when they are no longer safe for use, and reapplying finishes or paint on wooden surfaces.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
the (det -> use)
use (dobj -> involve)
of (prep -> use)
hand (nmod -> tools)
or (cc -> hand)
power (conj -> hand)
tools (pobj -> of)
and (cc -> tools)
equipment (conj -> tools)
, (punct -> involve)
understanding (advcl -> involve)
the (det -> properties)
properties (dobj -> understanding)
of (prep -> properties)
various (amod -> types)
wood (compound -> types)
types (pobj -> of)
and (cc -> types)
selecting (conj -> understanding)
tools (dobj -> selecting)
or (cc -> tools)
techniques (conj -> tools)
accordingly (advmod -> selecting)
, (punct -> selecting)
positioning (conj -> selecting)
and (cc -> positioning)
securing (conj -> positioning)
shaping (compound -> equipment)
equipment (dobj -> securing)
, (punct -> positioning)
checking (compound -> blades)
blades (conj -> positioning)
or (cc -> blades)
abrasive (amod -> attachments)
attachments (conj -> blades)
for (prep -> positioning)
sharpness (pobj -> for)
and (cc -> sharpness)
alignment (conj -> sharpness)
, (punct -> selecting)
replacing (conj -> selecting)
components (dobj -> replacing)
when (advmod -> are)
they (nsubj -> are)
are (advcl -> replacing)
no (neg -> longer)
longer (advmod -> are)
safe (acomp -> are)
for (prep -> safe)
use (pobj -> for)
, (punct -> replacing)
and (cc -> replacing)
reapplying (conj -> replacing)
finishes (dobj -> reapplying)
or (cc -> finishes)
paint (conj -> finishes)
on (prep -> paint)
wooden (amod -> surfaces)
surfaces (pobj -> on)
. (punct -> involve)
----------
Sentence: Monitor the jig or shape and adjust to ensure desired outcomes, and review designs or plans to confirm outcomes align with job requirements.
Monitor (ROOT -> Monitor)
the (det -> jig)
jig (dobj -> Monitor)
or (cc -> jig)
shape (conj -> jig)
and (cc -> Monitor)
adjust (conj -> Monitor)
to (aux -> ensure)
ensure (advcl -> adjust)
desired (amod -> outcomes)
outcomes (dobj -> ensure)
, (punct -> Monitor)
and (cc -> Monitor)
review (conj -> Monitor)
designs (dobj -> review)
or (cc -> designs)
plans (conj -> designs)
to (aux -> confirm)
confirm (xcomp -> plans)
outcomes (dobj -> confirm)
align (ccomp -> confirm)
with (prep -> align)
job (compound -> requirements)
requirements (pobj -> with)
. (punct -> Monitor)
----------
Sentence: Have discussions with customers, clients, or designers to determine or check the features, details, requirements, preferences, expectations, and other specifications of a product, good, or service.
Have (ROOT -> Have)
discussions (dobj -> Have)
with (prep -> discussions)
customers (pobj -> with)
, (punct -> customers)
clients (conj -> customers)
, (punct -> clients)
or (cc -> clients)
designers (conj -> clients)
to (aux -> determine)
determine (acl -> discussions)
or (cc -> determine)
check (conj -> determine)
the (det -> features)
features (dobj -> check)
, (punct -> features)
details (conj -> features)
, (punct -> details)
requirements (conj -> details)
, (punct -> requirements)
preferences (conj -> requirements)
, (punct -> preferences)
expectations (conj -> preferences)
, (punct -> expectations)
and (cc -> expectations)
other (amod -> specifications)
specifications (conj -> expectations)
of (prep -> specifications)
a (det -> product)
product (pobj -> of)
, (punct -> product)
good (conj -> product)
, (punct -> good)
or (cc -> good)
service (conj -> good)
. (punct -> Have)
----------
Sentence: This may involve negotiating to agree on details that are reasonable for applicable timeframes, budgets, standards, or safety requirements; presenting updates, design modifications or progress; explaining processes or procedures; and ensuring final output meets needs or requests.
This (nsubj -> involve)
may (aux -> involve)
involve (ccomp -> meets)
negotiating (xcomp -> involve)
to (aux -> agree)
agree (xcomp -> negotiating)
on (prep -> agree)
details (pobj -> on)
that (nsubj -> are)
are (relcl -> details)
reasonable (acomp -> are)
for (prep -> reasonable)
applicable (amod -> timeframes)
timeframes (pobj -> for)
, (punct -> timeframes)
budgets (conj -> timeframes)
, (punct -> budgets)
standards (conj -> budgets)
, (punct -> standards)
or (cc -> standards)
safety (compound -> requirements)
requirements (conj -> standards)
; (punct -> meets)
presenting (amod -> updates)
updates (nsubj -> meets)
, (punct -> updates)
design (compound -> modifications)
modifications (conj -> updates)
or (cc -> modifications)
progress (conj -> modifications)
; (punct -> updates)
explaining (csubj -> meets)
processes (dobj -> explaining)
or (cc -> processes)
procedures (conj -> processes)
; (punct -> explaining)
and (cc -> explaining)
ensuring (conj -> explaining)
final (amod -> output)
output (dobj -> ensuring)
meets (ROOT -> meets)
needs (dobj -> meets)
or (cc -> needs)
requests (conj -> needs)
. (punct -> meets)
----------
Sentence: Combine and put together various components such as fabric panels, zippers, buttons, linings, or decorative attachments using manual or machine construction techniques in order to assemble garments or textile products.
Combine (ROOT -> Combine)
and (cc -> Combine)
put (conj -> Combine)
together (advmod -> put)
various (amod -> components)
components (dobj -> put)
such (amod -> as)
as (prep -> components)
fabric (compound -> panels)
panels (pobj -> as)
, (punct -> panels)
zippers (conj -> panels)
, (punct -> zippers)
buttons (conj -> zippers)
, (punct -> buttons)
linings (conj -> buttons)
, (punct -> linings)
or (cc -> linings)
decorative (amod -> attachments)
attachments (conj -> linings)
using (advcl -> put)
manual (amod -> techniques)
or (cc -> manual)
machine (conj -> manual)
construction (compound -> techniques)
techniques (dobj -> using)
in (prep -> put)
order (pobj -> in)
to (aux -> assemble)
assemble (acl -> order)
garments (dobj -> assemble)
or (cc -> garments)
textile (compound -> products)
products (conj -> garments)
. (punct -> Combine)
----------
Sentence: Follow work specifications or patterns in order to ensure accuracy and quality, and choose techniques and materials based on desired finished product.
Follow (ROOT -> Follow)
work (compound -> specifications)
specifications (dobj -> Follow)
or (cc -> specifications)
patterns (conj -> specifications)
in (prep -> Follow)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
accuracy (dobj -> ensure)
and (cc -> accuracy)
quality (conj -> accuracy)
, (punct -> Follow)
and (cc -> Follow)
choose (conj -> Follow)
techniques (dobj -> choose)
and (cc -> techniques)
materials (conj -> techniques)
based (acl -> techniques)
on (prep -> based)
desired (amod -> product)
finished (amod -> product)
product (pobj -> on)
. (punct -> Follow)
----------
Sentence: Join textiles or other materials together through the application of adhesives.
Join (ROOT -> Join)
textiles (dobj -> Join)
or (cc -> textiles)
other (amod -> materials)
materials (conj -> textiles)
together (advmod -> Join)
through (prep -> Join)
the (det -> application)
application (pobj -> through)
of (prep -> application)
adhesives (pobj -> of)
. (punct -> Join)
----------
Sentence: Review job specifications in order to determine factors such as appropriate materials, adhesives, tools, equipment, and techniques.
Review (ROOT -> Review)
job (compound -> specifications)
specifications (dobj -> Review)
in (prep -> Review)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
factors (dobj -> determine)
such (amod -> as)
as (prep -> factors)
appropriate (amod -> materials)
materials (pobj -> as)
, (punct -> materials)
adhesives (conj -> materials)
, (punct -> adhesives)
tools (conj -> adhesives)
, (punct -> tools)
equipment (conj -> tools)
, (punct -> equipment)
and (cc -> equipment)
techniques (conj -> equipment)
. (punct -> Review)
----------
Sentence: Follow manufacturer specifications to ensure proper application, curing, drying, and finishing techniques.
Follow (ROOT -> Follow)
manufacturer (compound -> specifications)
specifications (dobj -> Follow)
to (aux -> ensure)
ensure (advcl -> Follow)
proper (amod -> application)
application (dobj -> ensure)
, (punct -> Follow)
curing (advcl -> Follow)
, (punct -> curing)
drying (conj -> curing)
, (punct -> drying)
and (cc -> drying)
finishing (conj -> drying)
techniques (dobj -> finishing)
. (punct -> Follow)
----------
Sentence: Follow work health and safety procedures and inspect and test joins to ensure integrity and effectiveness of bond.
Follow (ROOT -> Follow)
work (nmod -> procedures)
health (nmod -> procedures)
and (cc -> health)
safety (conj -> health)
procedures (dobj -> Follow)
and (cc -> Follow)
inspect (conj -> Follow)
and (cc -> inspect)
test (conj -> inspect)
joins (conj -> Follow)
to (aux -> ensure)
ensure (advcl -> joins)
integrity (dobj -> ensure)
and (cc -> integrity)
effectiveness (conj -> integrity)
of (prep -> integrity)
bond (pobj -> of)
. (punct -> Follow)
----------
Sentence: Align or position parts or work pieces to ensure proper fit, clearance, or interconnection of components and that assembly is completed accurately in accordance with job requirements or design specifications.
Align (nmod -> parts)
or (cc -> Align)
position (conj -> Align)
parts (nsubjpass -> completed)
or (cc -> parts)
work (conj -> parts)
pieces (dobj -> work)
to (aux -> ensure)
ensure (relcl -> parts)
proper (amod -> fit)
fit (dobj -> ensure)
, (punct -> fit)
clearance (conj -> fit)
, (punct -> clearance)
or (cc -> clearance)
interconnection (conj -> clearance)
of (prep -> interconnection)
components (pobj -> of)
and (cc -> parts)
that (det -> assembly)
assembly (nsubjpass -> completed)
is (auxpass -> completed)
completed (ROOT -> completed)
accurately (advmod -> completed)
in (prep -> completed)
accordance (pobj -> in)
with (prep -> accordance)
job (compound -> requirements)
requirements (pobj -> with)
or (cc -> requirements)
design (compound -> specifications)
specifications (conj -> requirements)
. (punct -> completed)
----------
Sentence: Identify appropriate alignment, spacing, positions or measurements by reviewing job specifications or other work instructions.
Identify (ROOT -> Identify)
appropriate (amod -> alignment)
alignment (dobj -> Identify)
, (punct -> alignment)
spacing (conj -> alignment)
, (punct -> spacing)
positions (conj -> spacing)
or (cc -> positions)
measurements (conj -> positions)
by (prep -> positions)
reviewing (pcomp -> by)
job (compound -> specifications)
specifications (dobj -> reviewing)
or (cc -> specifications)
other (amod -> instructions)
work (compound -> instructions)
instructions (conj -> specifications)
. (punct -> Identify)
----------
Sentence: This may include the use of tools or guides such as templates, jigs, and measurement techniques to ensure accurate alignment.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> use)
use (dobj -> include)
of (prep -> use)
tools (pobj -> of)
or (cc -> tools)
guides (conj -> tools)
such (amod -> as)
as (prep -> guides)
templates (pobj -> as)
, (punct -> templates)
jigs (conj -> templates)
, (punct -> jigs)
and (cc -> jigs)
measurement (compound -> techniques)
techniques (conj -> jigs)
to (aux -> ensure)
ensure (relcl -> use)
accurate (amod -> alignment)
alignment (dobj -> ensure)
. (punct -> include)
----------
Sentence: Check alignment against job requirements, making adjustments as necessary.
Check (ROOT -> Check)
alignment (dobj -> Check)
against (prep -> alignment)
job (compound -> requirements)
requirements (pobj -> against)
, (punct -> Check)
making (advcl -> Check)
adjustments (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> Check)
----------
Sentence: Fabricate and repair canvas and related products such as awnings, tents, tarpaulins, sails, and caravan annexes.
Fabricate (ROOT -> Fabricate)
and (cc -> Fabricate)
repair (conj -> Fabricate)
canvas (dobj -> repair)
and (cc -> canvas)
related (amod -> products)
products (conj -> canvas)
such (amod -> as)
as (prep -> products)
awnings (pobj -> as)
, (punct -> awnings)
tents (conj -> awnings)
, (punct -> tents)
tarpaulins (conj -> tents)
, (punct -> tarpaulins)
sails (conj -> tarpaulins)
, (punct -> sails)
and (cc -> sails)
caravan (compound -> annexes)
annexes (conj -> sails)
. (punct -> Fabricate)
----------
Sentence: Review job requirements and specifications in order to select appropriate materials, tools and equipment, or techniques.
Review (ROOT -> Review)
job (compound -> requirements)
requirements (dobj -> Review)
and (cc -> requirements)
specifications (conj -> requirements)
in (prep -> Review)
order (pobj -> in)
to (aux -> select)
select (acl -> order)
appropriate (amod -> materials)
materials (dobj -> select)
, (punct -> materials)
tools (conj -> materials)
and (cc -> tools)
equipment (conj -> tools)
, (punct -> tools)
or (cc -> tools)
techniques (conj -> tools)
. (punct -> Review)
----------
Sentence: This may involve tasks such as measuring, marking up, or cutting materials; or applying canvas sewing or repair techniques.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
tasks (dobj -> involve)
such (amod -> as)
as (prep -> tasks)
measuring (pcomp -> as)
, (punct -> measuring)
marking (conj -> measuring)
up (prt -> marking)
, (punct -> marking)
or (cc -> marking)
cutting (conj -> marking)
materials (dobj -> cutting)
; (punct -> involve)
or (cc -> involve)
applying (conj -> involve)
canvas (compound -> sewing)
sewing (nmod -> techniques)
or (cc -> sewing)
repair (conj -> sewing)
techniques (dobj -> applying)
. (punct -> involve)
----------
Sentence: Inspect finished work to ensure completed product meets workplace expectations and quality specifications, making adjustments as necessary.
Inspect (csubj -> meets)
finished (amod -> work)
work (dobj -> Inspect)
to (aux -> ensure)
ensure (advcl -> work)
completed (amod -> product)
product (nsubj -> meets)
meets (ROOT -> meets)
workplace (amod -> expectations)
expectations (dobj -> meets)
and (cc -> expectations)
quality (compound -> specifications)
specifications (conj -> expectations)
, (punct -> meets)
making (advcl -> meets)
adjustments (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> meets)
----------
Sentence: Return functionality or desired appearance to furniture by upholstering or reupholstering frames, fixing defects and damage to structures and finishes, treating warped or stained surfaces, and adjusting or replacing components such as webbing, padding, springs, and fabrics.
Return (ROOT -> Return)
functionality (dobj -> Return)
or (cc -> Return)
desired (conj -> Return)
appearance (dobj -> desired)
to (prep -> appearance)
furniture (pobj -> to)
by (prep -> desired)
upholstering (pcomp -> by)
or (cc -> upholstering)
reupholstering (conj -> upholstering)
frames (dobj -> reupholstering)
, (punct -> desired)
fixing (advcl -> desired)
defects (dobj -> fixing)
and (cc -> defects)
damage (conj -> defects)
to (prep -> defects)
structures (pobj -> to)
and (cc -> structures)
finishes (conj -> structures)
, (punct -> fixing)
treating (conj -> fixing)
warped (amod -> surfaces)
or (cc -> warped)
stained (conj -> warped)
surfaces (dobj -> treating)
, (punct -> treating)
and (cc -> treating)
adjusting (conj -> treating)
or (cc -> adjusting)
replacing (conj -> adjusting)
components (dobj -> replacing)
such (amod -> as)
as (prep -> components)
webbing (pobj -> as)
, (punct -> webbing)
padding (conj -> webbing)
, (punct -> padding)
springs (conj -> padding)
, (punct -> springs)
and (cc -> springs)
fabrics (conj -> springs)
. (punct -> Return)
----------
Sentence: Return functionality or desired appearance to furniture by upholstering or reupholstering frames, fixing defects and damage to structures and finishes, treating warped or stained surfaces, and adjusting or replacing components such as webbing, padding, springs, and fabrics.
Return (ROOT -> Return)
functionality (dobj -> Return)
or (cc -> Return)
desired (conj -> Return)
appearance (dobj -> desired)
to (prep -> appearance)
furniture (pobj -> to)
by (prep -> desired)
upholstering (pcomp -> by)
or (cc -> upholstering)
reupholstering (conj -> upholstering)
frames (dobj -> reupholstering)
, (punct -> desired)
fixing (advcl -> desired)
defects (dobj -> fixing)
and (cc -> defects)
damage (conj -> defects)
to (prep -> defects)
structures (pobj -> to)
and (cc -> structures)
finishes (conj -> structures)
, (punct -> fixing)
treating (conj -> fixing)
warped (amod -> surfaces)
or (cc -> warped)
stained (conj -> warped)
surfaces (dobj -> treating)
, (punct -> treating)
and (cc -> treating)
adjusting (conj -> treating)
or (cc -> adjusting)
replacing (conj -> adjusting)
components (dobj -> replacing)
such (amod -> as)
as (prep -> components)
webbing (pobj -> as)
, (punct -> webbing)
padding (conj -> webbing)
, (punct -> padding)
springs (conj -> padding)
, (punct -> springs)
and (cc -> springs)
fabrics (conj -> springs)
. (punct -> Return)
----------
Sentence: Position and secure materials or work pieces onto production equipment in order to facilitate, processing, manufacturing, or production.
Position (nmod -> pieces)
and (cc -> Position)
secure (conj -> Position)
materials (conj -> Position)
or (cc -> materials)
work (conj -> Position)
pieces (ROOT -> pieces)
onto (prep -> pieces)
production (compound -> equipment)
equipment (pobj -> onto)
in (prep -> pieces)
order (pobj -> in)
to (aux -> facilitate)
facilitate (acl -> order)
, (punct -> facilitate)
processing (conj -> facilitate)
, (punct -> processing)
manufacturing (conj -> processing)
, (punct -> manufacturing)
or (cc -> manufacturing)
production (conj -> manufacturing)
. (punct -> pieces)
----------
Sentence: Follow work instructions, manufacturing specifications and work health and safety guidelines in order to determine safe methods, correct positioning, and suitable securement methods including appropriate fixtures, clamps, or adhesives.
Follow (ROOT -> Follow)
work (compound -> instructions)
instructions (dobj -> Follow)
, (punct -> instructions)
manufacturing (advcl -> Follow)
specifications (dobj -> manufacturing)
and (cc -> manufacturing)
work (conj -> manufacturing)
health (nmod -> guidelines)
and (cc -> health)
safety (conj -> health)
guidelines (dobj -> work)
in (prep -> manufacturing)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
safe (amod -> methods)
methods (dobj -> determine)
, (punct -> methods)
correct (amod -> positioning)
positioning (conj -> methods)
, (punct -> positioning)
and (cc -> positioning)
suitable (amod -> methods)
securement (amod -> methods)
methods (conj -> positioning)
including (prep -> methods)
appropriate (amod -> fixtures)
fixtures (pobj -> including)
, (punct -> fixtures)
clamps (conj -> fixtures)
, (punct -> clamps)
or (cc -> clamps)
adhesives (conj -> clamps)
. (punct -> Follow)
----------
Sentence: Inspect work and make adjustments in order to ensure safe and proper alignment.
Inspect (ROOT -> Inspect)
work (dobj -> Inspect)
and (cc -> Inspect)
make (conj -> Inspect)
adjustments (dobj -> make)
in (prep -> make)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
safe (amod -> alignment)
and (cc -> safe)
proper (conj -> safe)
alignment (dobj -> ensure)
. (punct -> Inspect)
----------
Sentence: Clean carpets, rugs, upholstery or drapery in order to maintain cleanliness, appearance, lifespan or functionality.
Clean (amod -> carpets)
carpets (ROOT -> carpets)
, (punct -> carpets)
rugs (conj -> carpets)
, (punct -> rugs)
upholstery (conj -> rugs)
or (cc -> upholstery)
drapery (conj -> upholstery)
in (prep -> carpets)
order (pobj -> in)
to (aux -> maintain)
maintain (acl -> order)
cleanliness (dobj -> maintain)
, (punct -> cleanliness)
appearance (conj -> cleanliness)
, (punct -> appearance)
lifespan (conj -> appearance)
or (cc -> lifespan)
functionality (conj -> lifespan)
. (punct -> carpets)
----------
Sentence: This may involve dusting, shaking, vacuuming, shampooing materials or applying other topical agents in order to remove stains, dust, dirt, and other contaminants.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
dusting (xcomp -> involve)
, (punct -> dusting)
shaking (conj -> dusting)
, (punct -> shaking)
vacuuming (conj -> shaking)
, (punct -> vacuuming)
shampooing (conj -> vacuuming)
materials (dobj -> shampooing)
or (cc -> shampooing)
applying (conj -> shampooing)
other (amod -> agents)
topical (amod -> agents)
agents (dobj -> applying)
in (prep -> involve)
order (pobj -> in)
to (aux -> remove)
remove (acl -> order)
stains (dobj -> remove)
, (punct -> stains)
dust (conj -> stains)
, (punct -> dust)
dirt (conj -> dust)
, (punct -> dirt)
and (cc -> dirt)
other (amod -> contaminants)
contaminants (conj -> dirt)
. (punct -> involve)
----------
Sentence: Choose cleaning products and methods in accordance with the requirements or sensitivities of the surface or material, according to manufacturer’s recommendations, established work procedures or best practice, and align with work health and safety requirements including for the use of hazardous substances.
Choose (ROOT -> Choose)
cleaning (xcomp -> Choose)
products (dobj -> cleaning)
and (cc -> products)
methods (conj -> products)
in (prep -> cleaning)
accordance (pobj -> in)
with (prep -> accordance)
the (det -> requirements)
requirements (pobj -> with)
or (cc -> requirements)
sensitivities (conj -> requirements)
of (prep -> requirements)
the (det -> surface)
surface (pobj -> of)
or (cc -> surface)
material (conj -> surface)
, (punct -> Choose)
according (prep -> Choose)
to (prep -> according)
manufacturer (poss -> recommendations)
’s (case -> manufacturer)
recommendations (pobj -> to)
, (punct -> Choose)
established (conj -> Choose)
work (compound -> procedures)
procedures (dobj -> established)
or (cc -> procedures)
best (amod -> practice)
practice (conj -> procedures)
, (punct -> established)
and (cc -> established)
align (conj -> established)
with (prep -> align)
work (nmod -> requirements)
health (nmod -> requirements)
and (cc -> health)
safety (conj -> health)
requirements (pobj -> with)
including (prep -> requirements)
for (prep -> including)
the (det -> use)
use (pobj -> for)
of (prep -> use)
hazardous (amod -> substances)
substances (pobj -> of)
. (punct -> Choose)
----------
Sentence: Combine and put together various components such as fabric panels, zippers, buttons, linings, or decorative attachments using manual or machine construction techniques in order to assemble garments or textile products.
Combine (ROOT -> Combine)
and (cc -> Combine)
put (conj -> Combine)
together (advmod -> put)
various (amod -> components)
components (dobj -> put)
such (amod -> as)
as (prep -> components)
fabric (compound -> panels)
panels (pobj -> as)
, (punct -> panels)
zippers (conj -> panels)
, (punct -> zippers)
buttons (conj -> zippers)
, (punct -> buttons)
linings (conj -> buttons)
, (punct -> linings)
or (cc -> linings)
decorative (amod -> attachments)
attachments (conj -> linings)
using (advcl -> put)
manual (amod -> techniques)
or (cc -> manual)
machine (conj -> manual)
construction (compound -> techniques)
techniques (dobj -> using)
in (prep -> put)
order (pobj -> in)
to (aux -> assemble)
assemble (acl -> order)
garments (dobj -> assemble)
or (cc -> garments)
textile (compound -> products)
products (conj -> garments)
. (punct -> Combine)
----------
Sentence: Follow work specifications or patterns in order to ensure accuracy and quality, and choose techniques and materials based on desired finished product.
Follow (ROOT -> Follow)
work (compound -> specifications)
specifications (dobj -> Follow)
or (cc -> specifications)
patterns (conj -> specifications)
in (prep -> Follow)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
accuracy (dobj -> ensure)
and (cc -> accuracy)
quality (conj -> accuracy)
, (punct -> Follow)
and (cc -> Follow)
choose (conj -> Follow)
techniques (dobj -> choose)
and (cc -> techniques)
materials (conj -> techniques)
based (acl -> techniques)
on (prep -> based)
desired (amod -> product)
finished (amod -> product)
product (pobj -> on)
. (punct -> Follow)
----------
Sentence: Fabricate and repair canvas and related products such as awnings, tents, tarpaulins, sails, and caravan annexes.
Fabricate (ROOT -> Fabricate)
and (cc -> Fabricate)
repair (conj -> Fabricate)
canvas (dobj -> repair)
and (cc -> canvas)
related (amod -> products)
products (conj -> canvas)
such (amod -> as)
as (prep -> products)
awnings (pobj -> as)
, (punct -> awnings)
tents (conj -> awnings)
, (punct -> tents)
tarpaulins (conj -> tents)
, (punct -> tarpaulins)
sails (conj -> tarpaulins)
, (punct -> sails)
and (cc -> sails)
caravan (compound -> annexes)
annexes (conj -> sails)
. (punct -> Fabricate)
----------
Sentence: Review job requirements and specifications in order to select appropriate materials, tools and equipment, or techniques.
Review (ROOT -> Review)
job (compound -> requirements)
requirements (dobj -> Review)
and (cc -> requirements)
specifications (conj -> requirements)
in (prep -> Review)
order (pobj -> in)
to (aux -> select)
select (acl -> order)
appropriate (amod -> materials)
materials (dobj -> select)
, (punct -> materials)
tools (conj -> materials)
and (cc -> tools)
equipment (conj -> tools)
, (punct -> tools)
or (cc -> tools)
techniques (conj -> tools)
. (punct -> Review)
----------
Sentence: This may involve tasks such as measuring, marking up, or cutting materials; or applying canvas sewing or repair techniques.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
tasks (dobj -> involve)
such (amod -> as)
as (prep -> tasks)
measuring (pcomp -> as)
, (punct -> measuring)
marking (conj -> measuring)
up (prt -> marking)
, (punct -> marking)
or (cc -> marking)
cutting (conj -> marking)
materials (dobj -> cutting)
; (punct -> involve)
or (cc -> involve)
applying (conj -> involve)
canvas (compound -> sewing)
sewing (nmod -> techniques)
or (cc -> sewing)
repair (conj -> sewing)
techniques (dobj -> applying)
. (punct -> involve)
----------
Sentence: Inspect finished work to ensure completed product meets workplace expectations and quality specifications, making adjustments as necessary.
Inspect (csubj -> meets)
finished (amod -> work)
work (dobj -> Inspect)
to (aux -> ensure)
ensure (advcl -> work)
completed (amod -> product)
product (nsubj -> meets)
meets (ROOT -> meets)
workplace (amod -> expectations)
expectations (dobj -> meets)
and (cc -> expectations)
quality (compound -> specifications)
specifications (conj -> expectations)
, (punct -> meets)
making (advcl -> meets)
adjustments (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> meets)
----------
Sentence: Adjust the alignment, positioning, or tension of fabrics or other materials during garment production or processing in order to facilitate the production process, ensure accurate cutting or stitching, or achieve desired effect.
Adjust (ROOT -> Adjust)
the (det -> alignment)
alignment (dobj -> Adjust)
, (punct -> alignment)
positioning (conj -> alignment)
, (punct -> positioning)
or (cc -> positioning)
tension (conj -> positioning)
of (prep -> tension)
fabrics (pobj -> of)
or (cc -> fabrics)
other (amod -> materials)
materials (conj -> fabrics)
during (prep -> Adjust)
garment (compound -> production)
production (pobj -> during)
or (cc -> production)
processing (conj -> production)
in (prep -> Adjust)
order (pobj -> in)
to (aux -> facilitate)
facilitate (acl -> order)
the (det -> process)
production (compound -> process)
process (dobj -> facilitate)
, (punct -> Adjust)
ensure (conj -> Adjust)
accurate (amod -> cutting)
cutting (dobj -> ensure)
or (cc -> cutting)
stitching (conj -> cutting)
, (punct -> ensure)
or (cc -> ensure)
achieve (conj -> ensure)
desired (amod -> effect)
effect (dobj -> achieve)
. (punct -> Adjust)
----------
Sentence: This may involve techniques such as stretching, folding, bending, shaping, straightening, or smoothing.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
techniques (dobj -> involve)
such (amod -> as)
as (prep -> techniques)
stretching (pobj -> as)
, (punct -> stretching)
folding (conj -> stretching)
, (punct -> folding)
bending (conj -> folding)
, (punct -> bending)
shaping (conj -> bending)
, (punct -> shaping)
straightening (conj -> shaping)
, (punct -> straightening)
or (cc -> straightening)
smoothing (conj -> straightening)
. (punct -> involve)
----------
Sentence: Inspect and further adjust materials as necessary to ensure the final garment meets job requirements.
Inspect (ROOT -> Inspect)
and (cc -> Inspect)
further (advmod -> adjust)
adjust (conj -> Inspect)
materials (dobj -> adjust)
as (prep -> adjust)
necessary (amod -> as)
to (aux -> ensure)
ensure (advcl -> adjust)
the (det -> garment)
final (amod -> garment)
garment (nsubj -> meets)
meets (ccomp -> ensure)
job (compound -> requirements)
requirements (dobj -> meets)
. (punct -> Inspect)
----------
Sentence: Exchange relevant knowledge, ideas, or insights with colleagues in order to support collaboration, effective work processes, informed decision-making and ongoing learning.
Exchange (advcl -> processes)
relevant (amod -> knowledge)
knowledge (dobj -> Exchange)
, (punct -> knowledge)
ideas (conj -> knowledge)
, (punct -> ideas)
or (cc -> ideas)
insights (conj -> ideas)
with (prep -> insights)
colleagues (pobj -> with)
in (prep -> Exchange)
order (pobj -> in)
to (aux -> support)
support (acl -> order)
collaboration (dobj -> support)
, (punct -> processes)
effective (amod -> processes)
work (compound -> processes)
processes (ROOT -> processes)
, (punct -> processes)
informed (amod -> making)
decision (compound -> making)
- (punct -> making)
making (conj -> processes)
and (cc -> making)
ongoing (amod -> learning)
learning (conj -> making)
. (punct -> processes)
----------
Sentence: Identify relevant information or staff members and communicate clearly or listen actively in order to ensure the effective exchange of information.
Identify (ROOT -> Identify)
relevant (amod -> information)
information (dobj -> Identify)
or (cc -> information)
staff (compound -> members)
members (conj -> information)
and (cc -> Identify)
communicate (conj -> Identify)
clearly (advmod -> communicate)
or (cc -> communicate)
listen (conj -> communicate)
actively (advmod -> listen)
in (prep -> listen)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
the (det -> exchange)
effective (amod -> exchange)
exchange (dobj -> ensure)
of (prep -> exchange)
information (pobj -> of)
. (punct -> Identify)
----------
Sentence: This may involve facilitating co-design, coordinating timelines or rosters, giving, or receiving technical expertise and guidance, receiving or providing details or updates about a product or service, reviewing work activities or performance, or otherwise having discussions with colleagues about relevant information.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
facilitating (xcomp -> involve)
co (dobj -> facilitating)
- (dobj -> facilitating)
design (dobj -> facilitating)
, (punct -> facilitating)
coordinating (conj -> facilitating)
timelines (dobj -> coordinating)
or (cc -> timelines)
rosters (conj -> timelines)
, (punct -> coordinating)
giving (conj -> coordinating)
, (punct -> giving)
or (cc -> giving)
receiving (conj -> giving)
technical (amod -> expertise)
expertise (dobj -> receiving)
and (cc -> expertise)
guidance (conj -> expertise)
, (punct -> receiving)
receiving (conj -> receiving)
or (cc -> receiving)
providing (conj -> receiving)
details (dobj -> providing)
or (cc -> details)
updates (conj -> details)
about (prep -> details)
a (det -> product)
product (pobj -> about)
or (cc -> product)
service (conj -> product)
, (punct -> receiving)
reviewing (conj -> receiving)
work (compound -> activities)
activities (dobj -> reviewing)
or (cc -> activities)
performance (conj -> activities)
, (punct -> reviewing)
or (cc -> reviewing)
otherwise (advmod -> having)
having (conj -> reviewing)
discussions (dobj -> having)
with (prep -> discussions)
colleagues (pobj -> with)
about (prep -> colleagues)
relevant (amod -> information)
information (pobj -> about)
. (punct -> involve)
----------
Sentence: Return functionality, structural integrity or desired appearance to textiles or apparel by mending damage (including sewing, patching, refinishing leather) or replacing components (such as zips, buttons, or heels).
Return (ROOT -> Return)
functionality (dobj -> Return)
, (punct -> functionality)
structural (amod -> integrity)
integrity (conj -> functionality)
or (cc -> integrity)
desired (amod -> appearance)
appearance (conj -> integrity)
to (prep -> appearance)
textiles (pobj -> to)
or (cc -> textiles)
apparel (conj -> textiles)
by (prep -> Return)
mending (pcomp -> by)
damage (dobj -> mending)
( (punct -> damage)
including (prep -> damage)
sewing (pobj -> including)
, (punct -> sewing)
patching (conj -> sewing)
, (punct -> patching)
refinishing (amod -> leather)
leather (appos -> sewing)
) (punct -> sewing)
or (cc -> sewing)
replacing (conj -> sewing)
components (dobj -> replacing)
( (punct -> components)
such (amod -> as)
as (prep -> components)
zips (pobj -> as)
, (punct -> zips)
buttons (conj -> zips)
, (punct -> buttons)
or (cc -> buttons)
heels (conj -> buttons)
) (punct -> Return)
. (punct -> Return)
----------
Sentence: Read, interpret, and understand work documentation such as reports, designs, blueprints, specifications, work orders, technical information, or other instructions to determine work requirements.
Read (ROOT -> Read)
, (punct -> Read)
interpret (conj -> Read)
, (punct -> interpret)
and (cc -> interpret)
understand (conj -> interpret)
work (compound -> documentation)
documentation (dobj -> understand)
such (amod -> as)
as (prep -> documentation)
reports (pobj -> as)
, (punct -> reports)
designs (conj -> reports)
, (punct -> designs)
blueprints (conj -> designs)
, (punct -> blueprints)
specifications (conj -> blueprints)
, (punct -> specifications)
work (compound -> orders)
orders (conj -> specifications)
, (punct -> orders)
technical (amod -> information)
information (conj -> orders)
, (punct -> information)
or (cc -> information)
other (amod -> instructions)
instructions (conj -> information)
to (aux -> determine)
determine (acl -> instructions)
work (compound -> requirements)
requirements (dobj -> determine)
. (punct -> Read)
----------
Sentence: These may include the required materials, resources, equipment, tools, machinery, timeframes, dependencies, procedures, processes, sequences, or methods to deliver the required outcome.
These (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> materials)
required (amod -> materials)
materials (dobj -> include)
, (punct -> materials)
resources (conj -> materials)
, (punct -> resources)
equipment (conj -> resources)
, (punct -> equipment)
tools (conj -> equipment)
, (punct -> tools)
machinery (conj -> tools)
, (punct -> machinery)
timeframes (conj -> machinery)
, (punct -> timeframes)
dependencies (conj -> timeframes)
, (punct -> dependencies)
procedures (conj -> dependencies)
, (punct -> procedures)
processes (conj -> procedures)
, (punct -> processes)
sequences (conj -> processes)
, (punct -> sequences)
or (cc -> sequences)
methods (conj -> sequences)
to (aux -> deliver)
deliver (xcomp -> include)
the (det -> outcome)
required (amod -> outcome)
outcome (dobj -> deliver)
. (punct -> include)
----------
Sentence: Estimate the costs of goods, services, or materials by considering factors such as labour, production, transportation, or procurement expenses.
Estimate (ROOT -> Estimate)
the (det -> costs)
costs (dobj -> Estimate)
of (prep -> costs)
goods (pobj -> of)
, (punct -> goods)
services (conj -> goods)
, (punct -> services)
or (cc -> services)
materials (conj -> services)
by (prep -> Estimate)
considering (pcomp -> by)
factors (dobj -> considering)
such (amod -> as)
as (prep -> factors)
labour (pobj -> as)
, (punct -> labour)
production (conj -> labour)
, (punct -> production)
transportation (conj -> production)
, (punct -> transportation)
or (cc -> transportation)
procurement (compound -> expenses)
expenses (conj -> transportation)
. (punct -> Estimate)
----------
Sentence: Utilise cost estimation techniques, undertake research, and consider recent trends and historical data to develop accurate and realistic cost projections.
Utilise (compound -> techniques)
cost (compound -> techniques)
estimation (compound -> techniques)
techniques (nsubj -> undertake)
, (punct -> techniques)
undertake (ROOT -> undertake)
research (dobj -> undertake)
, (punct -> undertake)
and (cc -> undertake)
consider (conj -> undertake)
recent (amod -> trends)
trends (dobj -> consider)
and (cc -> trends)
historical (amod -> data)
data (conj -> trends)
to (aux -> develop)
develop (advcl -> consider)
accurate (amod -> projections)
and (cc -> accurate)
realistic (conj -> accurate)
cost (compound -> projections)
projections (dobj -> develop)
. (punct -> undertake)
----------
Sentence: Record operational or production data, identifying and capturing relevant information accurately and systemically, to enable the monitoring, control, or improvement of processes or to meet reporting and record keeping requirements.
Record (nmod -> data)
operational (amod -> data)
or (cc -> operational)
production (conj -> operational)
data (ROOT -> data)
, (punct -> data)
identifying (acl -> data)
and (cc -> identifying)
capturing (conj -> identifying)
relevant (amod -> information)
information (dobj -> capturing)
accurately (advmod -> capturing)
and (cc -> accurately)
systemically (conj -> accurately)
, (punct -> data)
to (aux -> enable)
enable (relcl -> data)
the (det -> monitoring)
monitoring (dobj -> enable)
, (punct -> monitoring)
control (conj -> monitoring)
, (punct -> control)
or (cc -> control)
improvement (conj -> control)
of (prep -> improvement)
processes (pobj -> of)
or (cc -> enable)
to (aux -> meet)
meet (conj -> enable)
reporting (dobj -> meet)
and (cc -> reporting)
record (compound -> keeping)
keeping (compound -> requirements)
requirements (conj -> reporting)
. (punct -> data)
----------
Sentence: This may include the use of industry-specific technical equipment or software.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
the (det -> use)
use (dobj -> include)
of (prep -> use)
industry (npadvmod -> specific)
- (punct -> specific)
specific (amod -> equipment)
technical (amod -> equipment)
equipment (pobj -> of)
or (cc -> equipment)
software (conj -> equipment)
. (punct -> include)
----------
Sentence: Attach decorative or functional accessories or fittings (such as buttons, handles, hooks, or zippers) to products in order to enhance usability or functionality, support accessibility needs, or create an aesthetically pleasing effect.
Attach (ROOT -> Attach)
decorative (amod -> accessories)
or (cc -> decorative)
functional (conj -> decorative)
accessories (dobj -> Attach)
or (cc -> accessories)
fittings (conj -> accessories)
( (punct -> fittings)
such (amod -> as)
as (prep -> fittings)
buttons (pobj -> as)
, (punct -> buttons)
handles (conj -> buttons)
, (punct -> handles)
hooks (conj -> handles)
, (punct -> hooks)
or (cc -> hooks)
zippers (conj -> hooks)
) (punct -> accessories)
to (prep -> Attach)
products (pobj -> to)
in (prep -> Attach)
order (pobj -> in)
to (aux -> enhance)
enhance (acl -> order)
usability (dobj -> enhance)
or (cc -> usability)
functionality (conj -> usability)
, (punct -> Attach)
support (conj -> Attach)
accessibility (compound -> needs)
needs (dobj -> support)
, (punct -> support)
or (cc -> support)
create (conj -> support)
an (det -> effect)
aesthetically (advmod -> pleasing)
pleasing (amod -> effect)
effect (dobj -> create)
. (punct -> Attach)
----------
Sentence: This may involve reviewing designs, conferring with customers to review their needs or preferences, selecting appropriate accessories and methods for attachment (such as adhesives, fastenings, welding, or stitching), and checking products to ensure functionality and quality.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
reviewing (amod -> designs)
designs (dobj -> involve)
, (punct -> involve)
conferring (advcl -> involve)
with (prep -> conferring)
customers (pobj -> with)
to (aux -> review)
review (advcl -> conferring)
their (poss -> needs)
needs (dobj -> review)
or (cc -> needs)
preferences (conj -> needs)
, (punct -> review)
selecting (conj -> conferring)
appropriate (amod -> accessories)
accessories (dobj -> selecting)
and (cc -> accessories)
methods (conj -> accessories)
for (prep -> accessories)
attachment (pobj -> for)
( (punct -> accessories)
such (amod -> as)
as (prep -> accessories)
adhesives (pobj -> as)
, (punct -> adhesives)
fastenings (conj -> adhesives)
, (punct -> fastenings)
welding (conj -> fastenings)
, (punct -> welding)
or (cc -> welding)
stitching (conj -> welding)
) (punct -> accessories)
, (punct -> involve)
and (cc -> involve)
checking (conj -> involve)
products (dobj -> checking)
to (aux -> ensure)
ensure (advcl -> checking)
functionality (dobj -> ensure)
and (cc -> functionality)
quality (conj -> functionality)
. (punct -> involve)
----------
Sentence: Design and fabricate work aids, for example, patterns, templates, fixtures, or jigs, to support the production process and ensure accuracy and quality of work.
Design (ROOT -> Design)
and (cc -> Design)
fabricate (conj -> Design)
work (compound -> aids)
aids (dobj -> fabricate)
, (punct -> Design)
for (prep -> patterns)
example (pobj -> for)
, (punct -> patterns)
patterns (conj -> Design)
, (punct -> patterns)
templates (conj -> patterns)
, (punct -> templates)
fixtures (conj -> templates)
, (punct -> fixtures)
or (cc -> fixtures)
jigs (conj -> fixtures)
, (punct -> Design)
to (aux -> support)
support (advcl -> Design)
the (det -> process)
production (compound -> process)
process (dobj -> support)
and (cc -> support)
ensure (conj -> support)
accuracy (dobj -> ensure)
and (cc -> accuracy)
quality (conj -> accuracy)
of (prep -> accuracy)
work (pobj -> of)
. (punct -> Design)
----------
Sentence: Interpret requirements and specifications for work pieces in order to determine the required measurements, patterns, tools, equipment, materials, and instructions.
Interpret (ROOT -> Interpret)
requirements (dobj -> Interpret)
and (cc -> requirements)
specifications (conj -> requirements)
for (prep -> requirements)
work (compound -> pieces)
pieces (pobj -> for)
in (prep -> Interpret)
order (pobj -> in)
to (aux -> determine)
determine (acl -> order)
the (det -> measurements)
required (amod -> measurements)
measurements (dobj -> determine)
, (punct -> measurements)
patterns (conj -> measurements)
, (punct -> patterns)
tools (conj -> patterns)
, (punct -> tools)
equipment (conj -> tools)
, (punct -> equipment)
materials (conj -> equipment)
, (punct -> materials)
and (cc -> materials)
instructions (conj -> materials)
. (punct -> Interpret)
----------
Sentence: This may include creating written or visual instructions to support task performance and ensure consistent outcomes.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
creating (xcomp -> include)
written (amod -> instructions)
or (cc -> written)
visual (conj -> written)
instructions (dobj -> creating)
to (aux -> support)
support (advcl -> creating)
task (compound -> performance)
performance (dobj -> support)
and (cc -> support)
ensure (conj -> support)
consistent (amod -> outcomes)
outcomes (dobj -> ensure)
. (punct -> include)
----------
Sentence: Review work aids against specifications, making alterations as necessary.
Review (nsubj -> work)
work (ROOT -> work)
aids (dobj -> work)
against (prep -> aids)
specifications (pobj -> against)
, (punct -> work)
making (advcl -> work)
alterations (dobj -> making)
as (prep -> making)
necessary (amod -> as)
. (punct -> work)
----------
Sentence: Cut and trim fabrics, textiles, leather or hide, using appropriate techniques and tools based on material requirements.
Cut (ROOT -> Cut)
and (cc -> Cut)
trim (conj -> Cut)
fabrics (dobj -> trim)
, (punct -> fabrics)
textiles (conj -> fabrics)
, (punct -> textiles)
leather (conj -> textiles)
or (cc -> leather)
hide (conj -> leather)
, (punct -> Cut)
using (advcl -> Cut)
appropriate (amod -> techniques)
techniques (dobj -> using)
and (cc -> techniques)
tools (conj -> techniques)
based (acl -> techniques)
on (prep -> based)
material (compound -> requirements)
requirements (pobj -> on)
. (punct -> Cut)
----------
Sentence: Account for factors such as material type, thickness, flexibility, or texture.
Account (ROOT -> Account)
for (prep -> Account)
factors (pobj -> for)
such (amod -> as)
as (prep -> factors)
material (compound -> type)
type (pobj -> as)
, (punct -> type)
thickness (conj -> type)
, (punct -> thickness)
flexibility (conj -> thickness)
, (punct -> flexibility)
or (cc -> flexibility)
texture (conj -> flexibility)
. (punct -> Account)
----------
Sentence: Follow measurements, patterns, or templates to ensure materials have the correct dimensions for use, installation, or further processing, meet quality requirements and to fix any deficiencies.
Follow (amod -> measurements)
measurements (nsubj -> have)
, (punct -> measurements)
patterns (conj -> measurements)
, (punct -> patterns)
or (cc -> patterns)
templates (conj -> patterns)
to (aux -> ensure)
ensure (relcl -> measurements)
materials (dobj -> ensure)
have (ROOT -> have)
the (det -> dimensions)
correct (amod -> dimensions)
dimensions (dobj -> have)
for (prep -> dimensions)
use (pobj -> for)
, (punct -> use)
installation (conj -> use)
, (punct -> installation)
or (cc -> installation)
further (amod -> processing)
processing (conj -> installation)
, (punct -> have)
meet (conj -> have)
quality (compound -> requirements)
requirements (dobj -> meet)
and (cc -> meet)
to (aux -> fix)
fix (conj -> meet)
any (det -> deficiencies)
deficiencies (dobj -> fix)
. (punct -> have)
----------
Sentence: Layout and mark guidelines on materials or workpieces to ensure accurate and consistent placements, measurements, and cuts.
Layout (ROOT -> Layout)
and (cc -> Layout)
mark (conj -> Layout)
guidelines (dobj -> mark)
on (prep -> guidelines)
materials (pobj -> on)
or (cc -> materials)
workpieces (conj -> materials)
to (aux -> ensure)
ensure (advcl -> mark)
accurate (amod -> placements)
and (cc -> accurate)
consistent (conj -> accurate)
placements (dobj -> ensure)
, (punct -> placements)
measurements (conj -> placements)
, (punct -> measurements)
and (cc -> measurements)
cuts (conj -> measurements)
. (punct -> Layout)
----------
Sentence: Utilise patterns, templates, blueprints, or sketches to ensure work is produced in accordance with job specifications.
Utilise (compound -> patterns)
patterns (ROOT -> patterns)
, (punct -> patterns)
templates (conj -> patterns)
, (punct -> templates)
blueprints (conj -> templates)
, (punct -> blueprints)
or (cc -> blueprints)
sketches (conj -> blueprints)
to (aux -> ensure)
ensure (xcomp -> sketches)
work (nsubjpass -> produced)
is (auxpass -> produced)
produced (ccomp -> ensure)
in (prep -> produced)
accordance (pobj -> in)
with (prep -> accordance)
job (compound -> specifications)
specifications (pobj -> with)
. (punct -> patterns)
----------
Sentence: Review and maintain data in information systems or databases, ensuring that information is up to date, correct, and kept in accordance with the relevant legislation or procedures.
Review (nsubj -> ensuring)
and (cc -> Review)
maintain (conj -> Review)
data (dobj -> maintain)
in (prep -> data)
information (compound -> systems)
systems (pobj -> in)
or (cc -> systems)
databases (conj -> systems)
, (punct -> maintain)
ensuring (ROOT -> ensuring)
that (mark -> is)
information (nsubj -> is)
is (ccomp -> ensuring)
up (prep -> is)
to (prep -> up)
date (pobj -> to)
, (punct -> is)
correct (acomp -> is)
, (punct -> correct)
and (cc -> correct)
kept (conj -> correct)
in (prep -> kept)
accordance (pobj -> in)
with (prep -> accordance)
the (det -> legislation)
relevant (amod -> legislation)
legislation (pobj -> with)
or (cc -> legislation)
procedures (conj -> legislation)
. (punct -> ensuring)
----------
Sentence: This may include regulations relating to information security, privacy, reporting and record keeping.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
regulations (dobj -> include)
relating (acl -> regulations)
to (prep -> relating)
information (compound -> security)
security (pobj -> to)
, (punct -> security)
privacy (conj -> security)
, (punct -> privacy)
reporting (conj -> privacy)
and (cc -> reporting)
record (compound -> keeping)
keeping (conj -> reporting)
. (punct -> include)
----------
Sentence: Handle medical billing tasks, such as submitting claims, processing payments, or updating patient records, ensuring that information is accurate and in compliance with relevant regulations, industry standards, and organisational policies.
Handle (ROOT -> Handle)
medical (amod -> tasks)
billing (compound -> tasks)
tasks (dobj -> Handle)
, (punct -> tasks)
such (amod -> as)
as (prep -> tasks)
submitting (pcomp -> as)
claims (dobj -> submitting)
, (punct -> claims)
processing (compound -> payments)
payments (conj -> claims)
, (punct -> submitting)
or (cc -> submitting)
updating (conj -> submitting)
patient (amod -> records)
records (dobj -> updating)
, (punct -> Handle)
ensuring (advcl -> Handle)
that (mark -> is)
information (nsubj -> is)
is (ccomp -> ensuring)
accurate (acomp -> is)
and (cc -> accurate)
in (conj -> accurate)
compliance (pobj -> in)
with (prep -> compliance)
relevant (amod -> regulations)
regulations (pobj -> with)
, (punct -> regulations)
industry (compound -> standards)
standards (conj -> regulations)
, (punct -> standards)
and (cc -> standards)
organisational (amod -> policies)
policies (conj -> standards)
. (punct -> Handle)
----------
Sentence: Collaborate with healthcare providers, insurance companies, government agencies or service providers and patients to process payments and resolve any billing-related issues or discrepancies.
Collaborate (ROOT -> Collaborate)
with (prep -> Collaborate)
healthcare (compound -> providers)
providers (pobj -> with)
, (punct -> providers)
insurance (compound -> companies)
companies (conj -> providers)
, (punct -> companies)
government (compound -> agencies)
agencies (conj -> companies)
or (cc -> agencies)
service (compound -> providers)
providers (conj -> agencies)
and (cc -> providers)
patients (conj -> providers)
to (aux -> process)
process (advcl -> Collaborate)
payments (dobj -> process)
and (cc -> process)
resolve (conj -> process)
any (det -> issues)
billing (npadvmod -> related)
- (punct -> related)
related (amod -> issues)
issues (dobj -> resolve)
or (cc -> issues)
discrepancies (conj -> issues)
. (punct -> Collaborate)
----------
Sentence: Develop organisational standards, policies, guidelines, programs, or procedures that govern or outline expectations for outcomes, quality, priorities, safety or behaviour within an organisation, or support employees to work safely and effectively.
Develop (ROOT -> Develop)
organisational (amod -> standards)
standards (dobj -> Develop)
, (punct -> standards)
policies (conj -> standards)
, (punct -> policies)
guidelines (conj -> policies)
, (punct -> guidelines)
programs (conj -> guidelines)
, (punct -> programs)
or (cc -> programs)
procedures (conj -> programs)
that (nsubj -> govern)
govern (relcl -> policies)
or (cc -> govern)
outline (conj -> govern)
expectations (dobj -> outline)
for (prep -> expectations)
outcomes (pobj -> for)
, (punct -> outcomes)
quality (conj -> outcomes)
, (punct -> quality)
priorities (conj -> quality)
, (punct -> priorities)
safety (conj -> priorities)
or (cc -> safety)
behaviour (conj -> safety)
within (prep -> govern)
an (det -> organisation)
organisation (pobj -> within)
, (punct -> Develop)
or (cc -> Develop)
support (conj -> Develop)
employees (dobj -> support)
to (aux -> work)
work (xcomp -> support)
safely (advmod -> work)
and (cc -> safely)
effectively (conj -> safely)
. (punct -> Develop)
----------
Sentence: Ensure alignment with organisational mission, values, and strategic objectives, and compliance with relevant standards, norms, laws, and regulations.
Ensure (ROOT -> Ensure)
alignment (dobj -> Ensure)
with (prep -> alignment)
organisational (amod -> mission)
mission (pobj -> with)
, (punct -> mission)
values (conj -> mission)
, (punct -> values)
and (cc -> values)
strategic (amod -> objectives)
objectives (conj -> values)
, (punct -> objectives)
and (cc -> objectives)
compliance (conj -> objectives)
with (prep -> compliance)
relevant (amod -> standards)
standards (pobj -> with)
, (punct -> standards)
norms (conj -> standards)
, (punct -> norms)
laws (conj -> norms)
, (punct -> laws)
and (cc -> laws)
regulations (conj -> laws)
. (punct -> Ensure)
----------
Sentence: Ensure that reporting requirements are met, and mechanisms for addressing non-compliance are clear.
Ensure (ROOT -> Ensure)
that (mark -> met)
reporting (compound -> requirements)
requirements (nsubjpass -> met)
are (auxpass -> met)
met (ccomp -> Ensure)
, (punct -> met)
and (cc -> met)
mechanisms (nsubj -> are)
for (prep -> mechanisms)
addressing (pcomp -> for)
non (dobj -> addressing)
- (dobj -> addressing)
compliance (dobj -> addressing)
are (conj -> Ensure)
clear (acomp -> are)
. (punct -> are)
----------
Sentence: Coordinate with others in order to plan, organise, coordinate, conduct, and manage operational activities and ensure work activities are completed efficiently, effectively and in line with operational policies and procedures.
Coordinate (ROOT -> Coordinate)
with (prep -> Coordinate)
others (pobj -> with)
in (prep -> Coordinate)
order (pobj -> in)
to (aux -> plan)
plan (acl -> order)
, (punct -> plan)
organise (conj -> plan)
, (punct -> organise)
coordinate (conj -> organise)
, (punct -> coordinate)
conduct (conj -> coordinate)
, (punct -> conduct)
and (cc -> Coordinate)
manage (conj -> Coordinate)
operational (amod -> activities)
activities (dobj -> manage)
and (cc -> manage)
ensure (conj -> manage)
work (compound -> activities)
activities (nsubjpass -> completed)
are (auxpass -> completed)
completed (ccomp -> ensure)
efficiently (advmod -> completed)
, (punct -> completed)
effectively (advmod -> completed)
and (cc -> effectively)
in (conj -> effectively)
line (pobj -> in)
with (prep -> line)
operational (amod -> policies)
policies (pobj -> with)
and (cc -> policies)
procedures (conj -> policies)
. (punct -> Coordinate)
----------
Sentence: This may involve identifying relevant tasks, dependencies, and technical requirements, communicating expectations, coordinating or scheduling work activities and tasks, delegating responsibilities, distributing materials, providing support to colleagues using open communication, requesting or providing technical advice, addressing concerns or issues, and facilitating a cooperative work environment that fosters teamwork and problem solving.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
identifying (xcomp -> involve)
relevant (amod -> tasks)
tasks (dobj -> identifying)
, (punct -> tasks)
dependencies (conj -> tasks)
, (punct -> dependencies)
and (cc -> dependencies)
technical (amod -> requirements)
requirements (conj -> dependencies)
, (punct -> requirements)
communicating (advcl -> involve)
expectations (dobj -> communicating)
, (punct -> expectations)
coordinating (conj -> communicating)
or (cc -> coordinating)
scheduling (compound -> work)
work (compound -> activities)
activities (conj -> coordinating)
and (cc -> activities)
tasks (conj -> activities)
, (punct -> coordinating)
delegating (conj -> coordinating)
responsibilities (dobj -> delegating)
, (punct -> delegating)
distributing (advcl -> communicating)
materials (dobj -> distributing)
, (punct -> communicating)
providing (conj -> communicating)
support (dobj -> providing)
to (dative -> providing)
colleagues (pobj -> to)
using (acl -> colleagues)
open (amod -> communication)
communication (dobj -> using)
, (punct -> providing)
requesting (conj -> providing)
or (cc -> requesting)
providing (conj -> requesting)
technical (amod -> advice)
advice (dobj -> providing)
, (punct -> providing)
addressing (conj -> providing)
concerns (dobj -> addressing)
or (cc -> concerns)
issues (conj -> concerns)
, (punct -> providing)
and (cc -> providing)
facilitating (conj -> providing)
a (det -> environment)
cooperative (amod -> environment)
work (compound -> environment)
environment (dobj -> facilitating)
that (nsubj -> fosters)
fosters (nsubj -> teamwork)
teamwork (relcl -> environment)
and (cc -> teamwork)
problem (compound -> solving)
solving (conj -> teamwork)
. (punct -> involve)
----------
Sentence: Track and monitor resource supply, stock, use, demand, state, or quality in order to ensure the effective supply, allocation and use of materials, equipment, finances or human resources.
Track (ROOT -> Track)
and (cc -> Track)
monitor (conj -> Track)
resource (compound -> supply)
supply (dobj -> monitor)
, (punct -> supply)
stock (conj -> supply)
, (punct -> stock)
use (conj -> stock)
, (punct -> use)
demand (conj -> use)
, (punct -> demand)
state (conj -> demand)
, (punct -> state)
or (cc -> state)
quality (conj -> state)
in (prep -> monitor)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
the (det -> supply)
effective (amod -> supply)
supply (dobj -> ensure)
, (punct -> supply)
allocation (conj -> supply)
and (cc -> allocation)
use (conj -> allocation)
of (prep -> allocation)
materials (pobj -> of)
, (punct -> materials)
equipment (conj -> materials)
, (punct -> equipment)
finances (conj -> equipment)
or (cc -> finances)
human (amod -> resources)
resources (conj -> finances)
. (punct -> Track)
----------
Sentence: Adjust resources and address issues or bottlenecks as required.
Adjust (ROOT -> Adjust)
resources (dobj -> Adjust)
and (cc -> resources)
address (compound -> issues)
issues (conj -> resources)
or (cc -> issues)
bottlenecks (conj -> issues)
as (mark -> required)
required (advcl -> Adjust)
. (punct -> Adjust)
----------
Sentence: Direct and oversee health care operations.
Direct (ROOT -> Direct)
and (cc -> Direct)
oversee (conj -> Direct)
health (compound -> care)
care (compound -> operations)
operations (dobj -> oversee)
. (punct -> Direct)
----------
Sentence: This may involve providing technical or specialist expertise and guidance or undertaking project management tasks such as determining and managing resourcing, scheduling, and ensuring compliance with relevant legislation, codes, and standards.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
providing (xcomp -> involve)
technical (amod -> expertise)
or (cc -> technical)
specialist (conj -> technical)
expertise (dobj -> providing)
and (cc -> expertise)
guidance (nmod -> tasks)
or (cc -> guidance)
undertaking (conj -> guidance)
project (compound -> management)
management (compound -> tasks)
tasks (conj -> expertise)
such (amod -> as)
as (prep -> tasks)
determining (pcomp -> as)
and (cc -> determining)
managing (conj -> determining)
resourcing (dobj -> managing)
, (punct -> resourcing)
scheduling (conj -> resourcing)
, (punct -> determining)
and (cc -> determining)
ensuring (conj -> determining)
compliance (dobj -> ensuring)
with (prep -> compliance)
relevant (amod -> legislation)
legislation (pobj -> with)
, (punct -> legislation)
codes (conj -> legislation)
, (punct -> codes)
and (cc -> codes)
standards (conj -> codes)
. (punct -> involve)
----------
Sentence: Undertake patient identification procedures to match patients to intended procedures or treatments in order to ensure patients receive the care intended for them and to prevent errors or adverse outcomes.
Undertake (ROOT -> Undertake)
patient (amod -> identification)
identification (compound -> procedures)
procedures (dobj -> Undertake)
to (aux -> match)
match (xcomp -> Undertake)
patients (dobj -> match)
to (prep -> match)
intended (amod -> procedures)
procedures (pobj -> to)
or (cc -> procedures)
treatments (conj -> procedures)
in (prep -> match)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
patients (nsubj -> receive)
receive (ccomp -> ensure)
the (det -> care)
care (dobj -> receive)
intended (acl -> care)
for (prep -> intended)
them (pobj -> for)
and (cc -> ensure)
to (aux -> prevent)
prevent (conj -> ensure)
errors (dobj -> prevent)
or (cc -> errors)
adverse (amod -> outcomes)
outcomes (conj -> errors)
. (punct -> Undertake)
----------
Sentence: Follow the NSQHS Communicating for Safety Standard for correct identification and procedure matching, and develop and utilise checklists, protocols, or routines to facilitate matching.
Follow (ROOT -> Follow)
the (det -> Communicating)
NSQHS (compound -> Communicating)
Communicating (dobj -> Follow)
for (prep -> Communicating)
Safety (compound -> Standard)
Standard (pobj -> for)
for (prep -> Communicating)
correct (amod -> identification)
identification (pobj -> for)
and (cc -> identification)
procedure (compound -> matching)
matching (conj -> identification)
, (punct -> Follow)
and (cc -> Follow)
develop (conj -> Follow)
and (cc -> develop)
utilise (conj -> develop)
checklists (dobj -> utilise)
, (punct -> checklists)
protocols (conj -> checklists)
, (punct -> protocols)
or (cc -> protocols)
routines (conj -> protocols)
to (aux -> facilitate)
facilitate (relcl -> routines)
matching (dobj -> facilitate)
. (punct -> Follow)
----------
Sentence: Utilise at least three approved patient identifiers at the time of admission or registration; when matching a patient's identity to care, medicine, therapy, or services; whenever clinical handover or patient transfer occurs; whenever discharge documentation is generated; and in specific service settings.
Utilise (nsubjpass -> generated)
at (advmod -> least)
least (advmod -> three)
three (nummod -> identifiers)
approved (amod -> identifiers)
patient (compound -> identifiers)
identifiers (dobj -> Utilise)
at (prep -> identifiers)
the (det -> time)
time (pobj -> at)
of (prep -> time)
admission (pobj -> of)
or (cc -> admission)
registration (conj -> admission)
; (punct -> generated)
when (advmod -> matching)
matching (advcl -> generated)
a (det -> patient)
patient (poss -> identity)
's (case -> patient)
identity (dobj -> matching)
to (prep -> identity)
care (pobj -> to)
, (punct -> care)
medicine (conj -> care)
, (punct -> medicine)
therapy (conj -> medicine)
, (punct -> therapy)
or (cc -> therapy)
services (conj -> therapy)
; (punct -> generated)
whenever (advmod -> occurs)
clinical (amod -> handover)
handover (nsubj -> occurs)
or (cc -> handover)
patient (amod -> transfer)
transfer (conj -> handover)
occurs (advcl -> generated)
; (punct -> generated)
whenever (advmod -> generated)
discharge (compound -> documentation)
documentation (nsubjpass -> generated)
is (auxpass -> generated)
generated (ROOT -> generated)
; (punct -> generated)
and (cc -> generated)
in (conj -> generated)
specific (amod -> settings)
service (compound -> settings)
settings (pobj -> in)
. (punct -> generated)
----------
Sentence: Liaise between departments or other groups to improve function or communication and ensure the effectiveness, safety, or quality of work practices and operations.
Liaise (ROOT -> Liaise)
between (prep -> Liaise)
departments (pobj -> between)
or (cc -> departments)
other (amod -> groups)
groups (conj -> departments)
to (aux -> improve)
improve (advcl -> Liaise)
function (dobj -> improve)
or (cc -> function)
communication (conj -> function)
and (cc -> improve)
ensure (conj -> improve)
the (det -> effectiveness)
effectiveness (dobj -> ensure)
, (punct -> effectiveness)
safety (conj -> effectiveness)
, (punct -> safety)
or (cc -> safety)
quality (conj -> safety)
of (prep -> quality)
work (compound -> practices)
practices (pobj -> of)
and (cc -> practices)
operations (conj -> practices)
. (punct -> Liaise)
----------
Sentence: Establish and foster professional connections with relevant stakeholders, share knowledge and ideas, address concerns, agree on common goals, and tailor liaison methods to ensure communication is provided with consideration of the preferences and accessibility needs of others in order to ultimately improve the overall functioning of the organisation.
Establish (nsubj -> agree)
and (cc -> Establish)
foster (amod -> connections)
professional (amod -> connections)
connections (conj -> Establish)
with (prep -> connections)
relevant (amod -> stakeholders)
stakeholders (pobj -> with)
, (punct -> Establish)
share (conj -> Establish)
knowledge (dobj -> share)
and (cc -> knowledge)
ideas (conj -> knowledge)
, (punct -> knowledge)
address (compound -> concerns)
concerns (conj -> knowledge)
, (punct -> Establish)
agree (ROOT -> agree)
on (prep -> agree)
common (amod -> goals)
goals (pobj -> on)
, (punct -> agree)
and (cc -> agree)
tailor (compound -> liaison)
liaison (compound -> methods)
methods (conj -> agree)
to (aux -> ensure)
ensure (advcl -> agree)
communication (nsubjpass -> provided)
is (auxpass -> provided)
provided (ccomp -> ensure)
with (prep -> provided)
consideration (pobj -> with)
of (prep -> consideration)
the (det -> preferences)
preferences (pobj -> of)
and (cc -> preferences)
accessibility (compound -> needs)
needs (conj -> preferences)
of (prep -> preferences)
others (pobj -> of)
in (prep -> provided)
order (pobj -> in)
to (aux -> improve)
ultimately (advmod -> improve)
improve (acl -> order)
the (det -> functioning)
overall (amod -> functioning)
functioning (dobj -> improve)
of (prep -> functioning)
the (det -> organisation)
organisation (pobj -> of)
. (punct -> agree)
----------
Sentence: Record, review, and maintain allied health or social service client records, ensuring that details are current, correct, and meet legal obligations for record keeping including what information can and cannot be collected.
Record (ROOT -> Record)
, (punct -> Record)
review (appos -> Record)
, (punct -> Record)
and (cc -> Record)
maintain (conj -> Record)
allied (amod -> health)
health (dobj -> maintain)
or (cc -> health)
social (amod -> service)
service (compound -> client)
client (compound -> records)
records (conj -> health)
, (punct -> maintain)
ensuring (advcl -> maintain)
that (mark -> are)
details (nsubj -> are)
are (ccomp -> ensuring)
current (amod -> correct)
, (punct -> correct)
correct (acomp -> are)
, (punct -> correct)
and (cc -> correct)
meet (conj -> correct)
legal (amod -> obligations)
obligations (dobj -> meet)
for (prep -> obligations)
record (compound -> keeping)
keeping (pobj -> for)
including (prep -> keeping)
what (det -> information)
information (pcomp -> including)
can (pcomp -> including)
and (cc -> meet)
can (aux -> collected)
not (neg -> collected)
be (auxpass -> collected)
collected (conj -> meet)
. (punct -> Record)
----------
Sentence: Ensure that records are stored, handled, maintained, or destroyed according to information security, privacy, and other requirements, including controlled access to personal information.
Ensure (ROOT -> Ensure)
that (mark -> stored)
records (nsubjpass -> stored)
are (auxpass -> stored)
stored (ccomp -> Ensure)
, (punct -> stored)
handled (conj -> stored)
, (punct -> handled)
maintained (conj -> handled)
, (punct -> maintained)
or (cc -> maintained)
destroyed (conj -> maintained)
according (prep -> destroyed)
to (prep -> according)
information (compound -> security)
security (pobj -> to)
, (punct -> security)
privacy (conj -> security)
, (punct -> privacy)
and (cc -> privacy)
other (amod -> requirements)
requirements (conj -> privacy)
, (punct -> requirements)
including (prep -> requirements)
controlled (amod -> access)
access (pobj -> including)
to (prep -> access)
personal (amod -> information)
information (pobj -> to)
. (punct -> Ensure)
----------
Sentence: Develop budgets for projects or operations by determining budget parameters based on research, consultation, and negotiation; analysing available information; making income and expenditure estimates; and allocating financial resources in alignment with strategic goals and objectives.
Develop (ROOT -> Develop)
budgets (dobj -> Develop)
for (prep -> budgets)
projects (pobj -> for)
or (cc -> projects)
operations (conj -> projects)
by (prep -> Develop)
determining (pcomp -> by)
budget (compound -> parameters)
parameters (dobj -> determining)
based (acl -> parameters)
on (prep -> based)
research (pobj -> on)
, (punct -> research)
consultation (conj -> research)
, (punct -> consultation)
and (cc -> consultation)
negotiation (conj -> consultation)
; (punct -> Develop)
analysing (advcl -> Develop)
available (amod -> information)
information (dobj -> analysing)
; (punct -> analysing)
making (conj -> analysing)
income (nmod -> estimates)
and (cc -> income)
expenditure (conj -> income)
estimates (dobj -> making)
; (punct -> analysing)
and (cc -> analysing)
allocating (conj -> analysing)
financial (amod -> resources)
resources (dobj -> allocating)
in (prep -> allocating)
alignment (pobj -> in)
with (prep -> allocating)
strategic (amod -> goals)
goals (pobj -> with)
and (cc -> goals)
objectives (conj -> goals)
. (punct -> Develop)
----------
Sentence: Monitor financial performance and adjust budget as needed during implementation in order to ensure goals are met.
Monitor (ROOT -> Monitor)
financial (amod -> performance)
performance (dobj -> Monitor)
and (cc -> Monitor)
adjust (conj -> Monitor)
budget (dobj -> adjust)
as (mark -> needed)
needed (advcl -> adjust)
during (prep -> needed)
implementation (pobj -> during)
in (prep -> adjust)
order (pobj -> in)
to (aux -> ensure)
ensure (acl -> order)
goals (nsubjpass -> met)
are (auxpass -> met)
met (ccomp -> ensure)
. (punct -> Monitor)
----------
Sentence: Put organisational processes or policy changes into effect, implementing improvements or changes that meet the needs of the organisation, its employees, or the individuals or communities it will affect.
Put (ROOT -> Put)
organisational (amod -> processes)
processes (dobj -> Put)
or (cc -> processes)
policy (compound -> changes)
changes (conj -> processes)
into (prep -> Put)
effect (pobj -> into)
, (punct -> Put)
implementing (advcl -> Put)
improvements (dobj -> implementing)
or (cc -> improvements)
changes (conj -> improvements)
that (nsubj -> meet)
meet (relcl -> improvements)
the (det -> needs)
needs (dobj -> meet)
of (prep -> needs)
the (det -> organisation)
organisation (pobj -> of)
, (punct -> implementing)
its (poss -> employees)
employees (dobj -> implementing)
, (punct -> employees)
or (cc -> employees)
the (det -> individuals)
individuals (conj -> employees)
or (cc -> individuals)
communities (conj -> individuals)
it (nsubj -> affect)
will (aux -> affect)
affect (relcl -> employees)
. (punct -> Put)
----------
Sentence: Ensure that stakeholders are informed, engaged, and prepared to adapt to new procedures or expectations.
Ensure (ROOT -> Ensure)
that (mark -> informed)
stakeholders (nsubjpass -> informed)
are (auxpass -> informed)
informed (ccomp -> Ensure)
, (punct -> informed)
engaged (conj -> informed)
, (punct -> informed)
and (cc -> informed)
prepared (conj -> Ensure)
to (aux -> adapt)
adapt (xcomp -> prepared)
to (prep -> adapt)
new (amod -> procedures)
procedures (pobj -> to)
or (cc -> procedures)
expectations (conj -> procedures)
. (punct -> Ensure)
----------
Sentence: Monitor the impact of changes and address any issues or concerns that arise during the transition process.
Monitor (ROOT -> Monitor)
the (det -> impact)
impact (dobj -> Monitor)
of (prep -> impact)
changes (pobj -> of)
and (cc -> Monitor)
address (conj -> Monitor)
any (det -> issues)
issues (dobj -> address)
or (cc -> issues)
concerns (conj -> issues)
that (nsubj -> arise)
arise (relcl -> concerns)
during (prep -> arise)
the (det -> process)
transition (compound -> process)
process (pobj -> during)
. (punct -> Monitor)
----------
Sentence: Review and maintain operational records, for example staff, inventory, customer, operation, production, performance, financial, technical, or maintenance data.
Review (ROOT -> Review)
and (cc -> Review)
maintain (conj -> Review)
operational (amod -> records)
records (dobj -> maintain)
, (punct -> maintain)
for (prep -> maintain)
example (compound -> staff)
staff (pobj -> for)
, (punct -> staff)
inventory (conj -> staff)
, (punct -> inventory)
customer (conj -> inventory)
, (punct -> customer)
operation (conj -> customer)
, (punct -> operation)
production (conj -> operation)
, (punct -> production)
performance (conj -> production)
, (punct -> performance)
financial (amod -> data)
, (punct -> financial)
technical (conj -> financial)
, (punct -> technical)
or (cc -> technical)
maintenance (conj -> technical)
data (conj -> performance)
. (punct -> Review)
----------
Sentence: Ensure that information is correct, up to date, meets reporting and record keeping requirements, and that new information is recorded and processed accordingly.
Ensure (ROOT -> Ensure)
that (det -> information)
information (nsubj -> is)
is (ccomp -> Ensure)
correct (acomp -> is)
, (punct -> is)
up (prep -> is)
to (prep -> up)
date (pobj -> to)
, (punct -> Ensure)
meets (conj -> Ensure)
reporting (dobj -> meets)
and (cc -> reporting)
record (compound -> keeping)
keeping (compound -> requirements)
requirements (conj -> reporting)
, (punct -> meets)
and (cc -> meets)
that (mark -> recorded)
new (amod -> information)
information (nsubjpass -> recorded)
is (auxpass -> recorded)
recorded (conj -> meets)
and (cc -> recorded)
processed (conj -> recorded)
accordingly (advmod -> processed)
. (punct -> Ensure)
----------
Sentence: This may also include ensuring that records are stored or destroyed appropriately according to information security or other privacy requirements.
This (nsubj -> include)
may (aux -> include)
also (advmod -> include)
include (ROOT -> include)
ensuring (xcomp -> include)
that (mark -> stored)
records (nsubjpass -> stored)
are (auxpass -> stored)
stored (ccomp -> ensuring)
or (cc -> stored)
destroyed (conj -> stored)
appropriately (advmod -> destroyed)
according (prep -> destroyed)
to (prep -> according)
information (compound -> security)
security (pobj -> to)
or (cc -> security)
other (amod -> requirements)
privacy (compound -> requirements)
requirements (conj -> security)
. (punct -> include)
----------
Sentence: Plan, organise and oversee the delivery of health care to patients and clients.
Plan (ROOT -> Plan)
, (punct -> Plan)
organise (conj -> Plan)
and (cc -> organise)
oversee (conj -> organise)
the (det -> delivery)
delivery (dobj -> oversee)
of (prep -> delivery)
health (compound -> care)
care (pobj -> of)
to (prep -> delivery)
patients (pobj -> to)
and (cc -> patients)
clients (conj -> patients)
. (punct -> Plan)
----------
Sentence: This may include providing specialist or technical knowledge and guidance to ensure care activities are undertaken correctly, effectively, and in accordance with relevant standards, regulations, and legislation.
This (nsubj -> include)
may (aux -> include)
include (ROOT -> include)
providing (xcomp -> include)
specialist (dobj -> providing)
or (cc -> specialist)
technical (amod -> knowledge)
knowledge (conj -> specialist)
and (cc -> knowledge)
guidance (conj -> knowledge)
to (aux -> ensure)
ensure (advcl -> providing)
care (compound -> activities)
activities (nsubjpass -> undertaken)
are (auxpass -> undertaken)
undertaken (ccomp -> ensure)
correctly (advmod -> undertaken)
, (punct -> undertaken)
effectively (advmod -> undertaken)
, (punct -> undertaken)
and (cc -> include)
in (conj -> include)
accordance (pobj -> in)
with (prep -> accordance)
relevant (amod -> standards)
standards (pobj -> with)
, (punct -> standards)
regulations (conj -> standards)
, (punct -> regulations)
and (cc -> regulations)
legislation (conj -> regulations)
. (punct -> include)
----------
Sentence: It may also include undertaking general project management tasks to ensure goals, timelines and budgets are met - such as managing staff and resource allocation; providing supervision, guidance, and direction; and undertaking planning and reporting.
It (nsubj -> include)
may (aux -> include)
also (advmod -> include)
include (ROOT -> include)
undertaking (xcomp -> include)
general (amod -> tasks)
project (compound -> management)
management (compound -> tasks)
tasks (dobj -> undertaking)
to (aux -> ensure)
ensure (advcl -> undertaking)
goals (nsubjpass -> met)
, (punct -> goals)
timelines (conj -> goals)
and (cc -> timelines)
budgets (conj -> timelines)
are (auxpass -> met)
met (ccomp -> ensure)
- (punct -> met)
such (amod -> as)
as (prep -> met)
managing (pcomp -> as)
staff (nmod -> allocation)
and (cc -> staff)
resource (conj -> staff)
allocation (dobj -> managing)
; (punct -> undertaking)
providing (conj -> undertaking)
supervision (dobj -> providing)
, (punct -> supervision)
guidance (conj -> supervision)
, (punct -> guidance)
and (cc -> guidance)
direction (conj -> guidance)
; (punct -> providing)
and (cc -> providing)
undertaking (conj -> providing)
planning (dobj -> undertaking)
and (cc -> planning)
reporting (conj -> planning)
. (punct -> include)
----------
Sentence: Coordinate with external stakeholders (such as suppliers, clients, community leaders or subject matter experts) in order to organise and manage operational activities and ensure that projects, programs or initiatives are executed efficiently, effectively and respectfully.
Coordinate (ROOT -> Coordinate)
with (prep -> Coordinate)
external (amod -> stakeholders)
stakeholders (pobj -> with)
( (punct -> stakeholders)
such (amod -> as)
as (prep -> stakeholders)
suppliers (pobj -> as)
, (punct -> suppliers)
clients (conj -> suppliers)
, (punct -> clients)
community (compound -> leaders)
leaders (conj -> clients)
or (cc -> leaders)
subject (amod -> experts)
matter (compound -> experts)
experts (conj -> leaders)
) (punct -> Coordinate)
in (prep -> Coordinate)
order (pobj -> in)
to (aux -> organise)
organise (acl -> order)
and (cc -> organise)
manage (conj -> organise)
operational (amod -> activities)
activities (dobj -> manage)
and (cc -> manage)
ensure (conj -> manage)
that (mark -> executed)
projects (nsubjpass -> executed)
, (punct -> projects)
programs (conj -> projects)
or (cc -> programs)
initiatives (conj -> programs)
are (auxpass -> executed)
executed (ccomp -> ensure)
efficiently (advmod -> executed)
, (punct -> executed)
effectively (advmod -> executed)
and (cc -> effectively)
respectfully (conj -> effectively)
. (punct -> Coordinate)
----------
Sentence: This may involve coordinating schedules, budgets, and resources, maintaining active listening and open lines of communication, addressing issues or concerns, and adhering to regulations, best practices, licensing requirements or organisational policies and procedures.
This (nsubj -> involve)
may (aux -> involve)
involve (ROOT -> involve)
coordinating (xcomp -> involve)
schedules (dobj -> coordinating)
, (punct -> schedules)
budgets (conj -> schedules)
, (punct -> budgets)
and (cc -> budgets)
resources (conj -> budgets)
, (punct -> coordinating)
maintaining (xcomp -> involve)
active (amod -> listening)
listening (amod -> lines)
and (cc -> listening)
open (conj -> listening)
lines (dobj -> maintaining)
of (prep -> lines)
communication (pobj -> of)
, (punct -> lines)
addressing (advcl -> maintaining)
issues (dobj -> addressing)
or (cc -> issues)
concerns (conj -> issues)
, (punct -> addressing)
and (cc -> addressing)
adhering (conj -> addressing)
to (prep -> adhering)
regulations (pobj -> to)
, (punct -> maintaining)
best (amod -> practices)
practices (conj -> maintaining)
, (punct -> practices)
licensing (compound -> requirements)
requirements (conj -> practices)
or (cc -> requirements)
organisational (amod -> policies)
policies (conj -> requirements)
and (cc -> policies)
procedures (conj -> policies)
. (punct -> involve)
----------
In [124]:
import spacy
from spacy import displacy
# Load the spaCy English language model
nlp = spacy.load("en_core_web_sm")
# Define the batch size and the total number of records to process
batch_size = 100
total_records = 500 # process only the first 5000 records
# Function to process text in batches and generate dependency trees
def process_text_in_batches(data, batch_size):
for start_idx in range(0, total_records, batch_size):
end_idx = start_idx + batch_size
# Ensure not to exceed the total number of records
if end_idx > total_records:
end_idx = total_records
batch = data[start_idx:end_idx]
# Process each document in the batch
for doc in nlp.pipe(batch, disable=["ner", "lemmatizer"]): # Disable unnecessary pipeline components
for sent in doc.sents:
# Display the dependency tree using displacy
svg = displacy.render(sent, style='dep', jupyter=True)
print(svg) # This will render the SVG in the notebook
# Slice the DataFrame to get only the required column and rows
data_slice = df['Skills Statement'].iloc[:total_records]
# Call the function with the sliced data
process_text_in_batches(data_slice, batch_size)
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
In [133]:
# Analyzing text descriptions to infer AI impact, assuming descriptions mention automation or AI
df['AI_Impact'] = df['Skills Statement'].apply(lambda x: 'high' if 'AI' in x.lower() or 'automation' in x.lower() else 'low')
# Visualize the distribution of AI impact on skills
ai_impact_counts = df['AI_Impact'].value_counts()
plt.figure(figsize=(8, 4))
ai_impact_counts.plot(kind='bar', color='orange')
plt.title('AI Impact on Skills')
plt.xlabel('Impact Level')
plt.ylabel('Number of Skills')
plt.xticks(rotation=0)
plt.show()
In [134]:
import spacy
nlp = spacy.load("en_core_web_sm")
def process_text(text):
doc = nlp(text)
lemmatized = " ".join([token.lemma_ for token in doc if not token.is_stop])
pos_tags = " ".join([token.pos_ for token in doc])
return lemmatized, pos_tags
# Apply text processing to extract lemmatized text and POS tags
df['lemmatized'], df['pos_tags'] = zip(*df['Skills Statement'].apply(process_text))
In [126]:
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
def create_cooccurrence_matrix(text, vocab=None):
count_model = CountVectorizer(ngram_range=(1,1), vocabulary=vocab) # Default unigram model
X = count_model.fit_transform(text)
Xc = (X.T * X) # This is the co-occurrence matrix in sparse csr format
Xc.setdiag(0) # Set the diagonals to be zeroes as we don't want to count self co-occurrences
coocc_arr = Xc.toarray()
vocab = count_model.get_feature_names_out()
return pd.DataFrame(coocc_arr, index=vocab, columns=vocab)
# Create a co-occurrence matrix for the lemmatized text
coocc_matrix = create_cooccurrence_matrix(df['lemmatized'])
In [127]:
!pip install textblob
Requirement already satisfied: textblob in /usr/local/lib/python3.11/dist-packages (0.19.0) Requirement already satisfied: nltk>=3.9 in /usr/local/lib/python3.11/dist-packages (from textblob) (3.9.1) Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk>=3.9->textblob) (8.1.8) Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk>=3.9->textblob) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.11/dist-packages (from nltk>=3.9->textblob) (2024.11.6) Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from nltk>=3.9->textblob) (4.67.1)
In [136]:
print(df[['ANZSCO Description', 'sentiment_polarity']])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-136-55d418bea75c> in <cell line: 0>() ----> 1 print(df[['ANZSCO Description', 'sentiment_polarity']]) /usr/local/lib/python3.11/dist-packages/pandas/core/frame.py in __getitem__(self, key) 4106 if is_iterator(key): 4107 key = list(key) -> 4108 indexer = self.columns._get_indexer_strict(key, "columns")[1] 4109 4110 # take() does not accept boolean indexers /usr/local/lib/python3.11/dist-packages/pandas/core/indexes/base.py in _get_indexer_strict(self, key, axis_name) 6198 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) 6199 -> 6200 self._raise_if_missing(keyarr, indexer, axis_name) 6201 6202 keyarr = self.take(indexer) /usr/local/lib/python3.11/dist-packages/pandas/core/indexes/base.py in _raise_if_missing(self, key, indexer, axis_name) 6250 6251 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) -> 6252 raise KeyError(f"{not_found} not in index") 6253 6254 @overload KeyError: "['sentiment_polarity'] not in index"
In [138]:
from textblob import TextBlob
# Function to calculate sentiment polarity
def sentiment_analysis(text):
return TextBlob(text).sentiment.polarity
df['sentiment'] = df['lemmatized'].apply(sentiment_analysis)
In [139]:
import pandas as pd
from textblob import TextBlob
# # Function to calculate sentiment polarity
# def calculate_sentiment(text):
# return TextBlob(text).sentiment.polarity
# Apply sentiment analysis on the 'ANZSCO Description' column
df['sentiment_polarity'] = df['ANZSCO Description'].apply(calculate_sentiment)
In [140]:
import matplotlib.pyplot as plt
# Plotting the sentiment distribution
plt.figure(figsize=(10, 6))
plt.hist(df['sentiment_polarity'], bins=30, color='blue', edgecolor='black')
plt.title('Sentiment Distribution in ANZSCO Descriptions')
plt.xlabel('Sentiment Polarity')
plt.ylabel('Frequency')
plt.show()
In [141]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from nltk import bigrams
from collections import Counter
import spacy
# Load and prepare your data
nlp = spacy.load('en_core_web_sm')
df['tokenized'] = df['ANZSCO Description'].apply(lambda x: [token.text.lower() for token in nlp(x) if not token.is_stop and not token.is_punct])
def plot_bigram_network(tokens, n=50):
# Create bigrams and count their frequencies
bi_grams = list(bigrams(tokens))
bigram_freq = Counter(bi_grams)
# Create a graph
G = nx.Graph()
for bigram, count in bigram_freq.most_common(n):
G.add_edge(bigram[0], bigram[1], weight=count)
# Define node positions using the Kamada-Kawai layout algorithm for better visualization
pos = nx.kamada_kawai_layout(G, weight=None) # `weight=None` ignores the weights for layout calculation
# Draw the graph
plt.figure(figsize=(16, 12))
nx.draw_networkx_nodes(G, pos, node_size=100, node_color='skyblue', alpha=0.7)
edge_widths = [d['weight'] for (u, v, d) in G.edges(data=True)]
nx.draw_networkx_edges(G, pos, width=4*np.array(edge_widths)/max(edge_widths), alpha=0.5, edge_color='gray')
nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif')
plt.title('Bigram Network Graph')
plt.axis('off')
plt.show()
# Concatenate tokens from all descriptions into a single list
all_tokens = [token for sublist in df['tokenized'] for token in sublist]
plot_bigram_network(all_tokens)
In [142]:
from nltk import trigrams
def plot_trigram_network(tokens, n=50):
# Create trigrams and count their frequencies
tri_grams = list(trigrams(tokens))
trigram_freq = Counter(tri_grams)
# Create a graph
G = nx.Graph()
for trigram, count in trigram_freq.most_common(n):
# Add edges between each pair of words in the trigram
G.add_edge(trigram[0], trigram[1], weight=count)
G.add_edge(trigram[1], trigram[2], weight=count)
# Optionally add a hyperedge-like component if you want to visualize all three connections together
G.add_edge(trigram[0], trigram[2], weight=count)
# Define node positions using the Kamada-Kawai layout algorithm for better visualization
pos = nx.kamada_kawai_layout(G, weight=None) # `weight=None` ignores the weights for layout calculation
# Draw the graph
plt.figure(figsize=(16, 12))
nx.draw_networkx_nodes(G, pos, node_size=100, node_color='lightgreen', alpha=0.7)
edge_widths = [d['weight'] for (u, v, d) in G.edges(data=True)]
nx.draw_networkx_edges(G, pos, width=4*np.array(edge_widths)/max(edge_widths), alpha=0.5, edge_color='darkgreen')
nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif')
plt.title('Trigram Network Graph')
plt.axis('off')
plt.show()
# Concatenate tokens from all descriptions into a single list
plot_trigram_network(all_tokens)
In [143]:
import pandas as pd
import spacy
from spacy import displacy # Corrected import to 'displacy'
# Select the first 1000 rows
df_subset = df.iloc[:1000]
nlp = spacy.load('en_core_web_sm')
# Processing the first description for dependency parsing
doc = nlp(df_subset['ANZSCO Description'].iloc[0])
# Visualize the dependency tree
displacy.render(doc, style='dep', jupyter=True, options={'distance': 100}) # Using 'displacy' with lowercase 'c'
# Define a function to visualize dependency trees for multiple descriptions
def visualize_dependency_trees(df, column_name, num_trees=5):
for text in df[column_name].head(num_trees):
doc = nlp(text)
displacy.render(doc, style='dep', options={'distance': 100}, jupyter=True) # Using 'displacy' with lowercase 'c'
# Apply the function to visualize the first 5 descriptions' dependency trees
visualize_dependency_trees(df_subset, 'ANZSCO Description')
In [144]:
import pandas as pd
import spacy
# Load spaCy model
nlp = spacy.load('en_core_web_sm')
# Define a function to process a chunk
def process_chunk(chunk):
# Apply the NLP pipeline to each text entry
chunk['processed'] = chunk['ANZSCO Description'].apply(lambda x: [(ent.text, ent.label_) for ent in nlp(x).ents])
return chunk
# Initialize an empty DataFrame to store results
processed_data = pd.DataFrame()
# Read the dataset in chunks
chunk_size = 1000 # Define your chunk size based on your memory capacity
for chunk in pd.read_csv(url, chunksize=chunk_size):
processed_chunk = process_chunk(chunk)
processed_data = pd.concat([processed_data, processed_chunk])
In [145]:
import plotly.graph_objects as go
import networkx as nx
from nltk import bigrams
from collections import Counter
import spacy
import pandas as pd
# Load your data and process it
nlp = spacy.load('en_core_web_sm')
df['tokenized'] = df['ANZSCO Description'].apply(lambda x: [token.text.lower() for token in nlp(x) if not token.is_stop and not token.is_punct])
def plotly_interactive_bigram_network(tokens, n=50):
bi_grams = list(bigrams(tokens))
bigram_freq = Counter(bi_grams)
# Create network graph
G = nx.Graph()
for bigram, count in bigram_freq.most_common(n):
G.add_edge(bigram[0], bigram[1], weight=count)
# Position nodes using the Fruchterman-Reingold force-directed algorithm
pos = nx.spring_layout(G, k=0.5, iterations=100) # You can adjust k for more space
edge_x = []
edge_y = []
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(
x=edge_x, y=edge_y,
line=dict(width=0.5, color='blue'),
hoverinfo='none',
mode='lines')
node_x = [pos[node][0] for node in G.nodes()]
node_y = [pos[node][1] for node in G.nodes()]
node_trace = go.Scatter(
x=node_x, y=node_y,
mode='markers+text',
hoverinfo='text',
marker=dict(showscale=True,
colorscale='Viridis',
size=10,
color=list(dict(G.degree()).values()),
colorbar=dict(thickness=15, title='Node Connections', xanchor='left', titleside='right'),
line_width=2))
node_text = [f'{node} ({G.degree(node)})' for node in G.nodes()]
node_trace.text = node_text
node_trace.marker.color = [len(G.edges(node)) for node in G.nodes()]
fig = go.Figure(data=[edge_trace, node_trace],
layout=go.Layout(
showlegend=False,
hovermode='closest',
margin=dict(b=0,l=0,r=0,t=0),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
title_text="Interactive Bigram Network Graph",
title_font_size=16))
fig.update_layout(template="plotly_dark")
fig.show()
# Concatenate tokens from all descriptions into a single list
all_tokens = [token for sublist in df['tokenized'] for token in sublist]
plotly_interactive_bigram_network(all_tokens)
In [146]:
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
from sklearn.cluster import KMeans
def silhouette_plot(X, range_n_clusters=[2, 3, 4, 5]):
for n_clusters in range_n_clusters:
fig, ax1 = plt.subplots(1, 1)
fig.set_size_inches(18, 7)
kmeans = KMeans(n_clusters=n_clusters, random_state=10)
cluster_labels = kmeans.fit_predict(X)
silhouette_avg = silhouette_score(X, cluster_labels)
print(f"For n_clusters = {n_clusters}, the average silhouette_score is : {silhouette_avg}")
sample_silhouette_values = silhouette_samples(X, cluster_labels)
y_lower = 10
for i in range(n_clusters):
ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.nipy_spectral(float(i) / n_clusters)
ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7)
y_lower = y_upper + 10
ax1.set_title("The silhouette plot for the various clusters.")
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
ax1.set_yticks([])
ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
plt.suptitle(f"Silhouette analysis for KMeans clustering on sample data with n_clusters = {n_clusters}", fontsize=14, fontweight='bold')
silhouette_plot(tfidf_matrix.toarray())
For n_clusters = 2, the average silhouette_score is : 0.016907956451177597 For n_clusters = 3, the average silhouette_score is : 0.01951168105006218 For n_clusters = 4, the average silhouette_score is : 0.02120300754904747 For n_clusters = 5, the average silhouette_score is : 0.021110709756612778
In [147]:
# Perform K-means clustering on the TF-IDF matrix
def perform_kmeans(X, n_clusters=3):
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(X)
labels = kmeans.labels_
# Plotting the results
plt.figure(figsize=(8, 6))
plt.hist(labels, bins=range(n_clusters + 1), align='left', color='orange', rwidth=0.8)
plt.title('Distribution of Clusters')
plt.xlabel('Cluster')
plt.ylabel('Number of Descriptions')
plt.xticks(range(n_clusters))
plt.show()
return labels
kmeans_labels = perform_kmeans(tfidf_matrix.toarray(), n_clusters=3)
In [148]:
# Assuming clustering labels are from a previous model run (e.g., K-means)
from sklearn.metrics import silhouette_score
def evaluate_clustering(X, labels):
silhouette_avg = silhouette_score(X, labels)
print("Silhouette Score: {:.2f}".format(silhouette_avg))
evaluate_clustering(tfidf_matrix.toarray(), kmeans_labels)
Silhouette Score: 0.02
In [149]:
# Enhanced Visualization of N-grams and Clustering Results
import seaborn as sns
from nltk import bigrams # Import bigrams
from collections import Counter # Import Counter
def visualize_ngrams(ngrams):
ngram_df = pd.DataFrame(ngrams.most_common(20), columns=['bigram', 'count'])
# Convert the 'bigram' column to strings for plotting
ngram_df['bigram'] = ngram_df['bigram'].astype(str) # This line fixes the error
plt.figure(figsize=(12, 8))
sns.barplot(x='count', y='bigram', data=ngram_df, palette='viridis')
plt.title('Top 20 Bigrams')
plt.xlabel('Frequency')
plt.ylabel('Bigrams')
plt.show()
# Assuming 'df' is your DataFrame and 'ANZSCO Description' is the column with text
df['tokenized'] = df['ANZSCO Description'].apply(lambda x: [token.text.lower() for token in nlp(x) if not token.is_stop and not token.is_punct])
all_tokens = [token for sublist in df['tokenized'] for token in sublist] # Re-create all_tokens here
bi_grams = list(bigrams(all_tokens)) # Re-create bi_grams here using all_tokens
bigram_freq = Counter(bi_grams) # Now bi_grams is defined and can be used
visualize_ngrams(bigram_freq)
<ipython-input-149-fbf8cdc05173>:11: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.
In [150]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from textblob import TextBlob
# Function to calculate sentiment polarity (if not already defined)
def calculate_sentiment(text):
return TextBlob(text).sentiment.polarity
# Apply sentiment analysis if 'sentiment_polarity' column doesn't exist
if 'sentiment_polarity' not in df.columns:
df['sentiment_polarity'] = df['ANZSCO Description'].apply(calculate_sentiment)
# Plotting the sentiment distribution and getting histogram data
counts, bin_edges = np.histogram(df['sentiment_polarity'], bins=30) # Get histogram data
plt.figure(figsize=(10, 6))
plt.hist(df['sentiment_polarity'], bins=30, color='blue', edgecolor='black')
plt.title('Sentiment Distribution in ANZSCO Descriptions')
plt.xlabel('Sentiment Polarity')
plt.ylabel('Frequency')
plt.show()
# Create a DataFrame for the histogram data
hist_data = pd.DataFrame({
'Sentiment Polarity Range': pd.Series(bin_edges[:-1]).apply(lambda x: f"{x:.2f}") + " to " + pd.Series(bin_edges[1:]).apply(lambda x: f"{x:.2f}"),
'Frequency': counts
})
# Display the table
display(hist_data)
| Sentiment Polarity Range | Frequency | |
|---|---|---|
| 0 | -0.50 to -0.46 | 14 |
| 1 | -0.46 to -0.42 | 0 |
| 2 | -0.42 to -0.38 | 23 |
| 3 | -0.38 to -0.34 | 0 |
| 4 | -0.34 to -0.30 | 0 |
| 5 | -0.30 to -0.26 | 141 |
| 6 | -0.26 to -0.22 | 173 |
| 7 | -0.22 to -0.18 | 169 |
| 8 | -0.18 to -0.14 | 60 |
| 9 | -0.14 to -0.10 | 1761 |
| 10 | -0.10 to -0.06 | 890 |
| 11 | -0.06 to -0.02 | 671 |
| 12 | -0.02 to 0.02 | 13976 |
| 13 | 0.02 to 0.06 | 1174 |
| 14 | 0.06 to 0.10 | 909 |
| 15 | 0.10 to 0.14 | 1098 |
| 16 | 0.14 to 0.18 | 424 |
| 17 | 0.18 to 0.22 | 550 |
| 18 | 0.22 to 0.26 | 443 |
| 19 | 0.26 to 0.30 | 166 |
| 20 | 0.30 to 0.34 | 112 |
| 21 | 0.34 to 0.38 | 210 |
| 22 | 0.38 to 0.42 | 323 |
| 23 | 0.42 to 0.46 | 17 |
| 24 | 0.46 to 0.50 | 33 |
| 25 | 0.50 to 0.54 | 425 |
| 26 | 0.54 to 0.58 | 22 |
| 27 | 0.58 to 0.62 | 53 |
| 28 | 0.62 to 0.66 | 0 |
| 29 | 0.66 to 0.70 | 45 |
In [151]:
# Load the spaCy medium-sized model for better word embeddings
import spacy
import numpy as np # Import spaCy
from sklearn.decomposition import PCA # Import PCA
from sklearn.cluster import KMeans # Import KMeans
import matplotlib.pyplot as plt # Import matplotlib.pyplot
import seaborn as sns # Import seaborn and alias it as sns
try:
nlp = spacy.load("en_core_web_md")
except OSError:
print("spaCy medium model not found. Installing now...")
!python -m spacy download en_core_web_md
nlp = spacy.load("en_core_web_md")
# Selecting the relevant columns
new_df = df[['ANZSCO Title', 'Specialist Task', 'Cluster Family', 'Core Competency']].dropna()
# Sample a subset of data for performance optimization
new_df_sample = new_df.sample(frac=0.05, random_state=42)
# Combine all text from the selected columns into a single string
text_data = " ".join(new_df_sample.astype(str).apply(lambda x: ' '.join(x), axis=1))
# Process text with spaCy NLP pipeline
nlp = spacy.load("en_core_web_md") # Load model
doc = nlp(text_data)
# Extract named entities ensuring they have vector representations
entities = [(ent.text, ent.label_) for ent in doc.ents if ent.has_vector]
# Convert entity text into vectors using spaCy's word embeddings
entity_texts = [ent[0] for ent in entities]
entity_vectors = np.array([nlp(ent[0]).vector for ent in entities if nlp(ent[0]).has_vector])
# Reduce dimensions using PCA for better visualization
pca = PCA(n_components=2)
entity_vectors_pca = pca.fit_transform(entity_vectors)
# Apply K-Means clustering
num_clusters = 10 # Set number of clusters
kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10) # KMeans is now imported
clusters = kmeans.fit_predict(entity_vectors_pca)
# Create DataFrame for visualization
cluster_df = pd.DataFrame({
"Entity": entity_texts[:len(entity_vectors_pca)],
"X": entity_vectors_pca[:, 0],
"Y": entity_vectors_pca[:, 1],
"Cluster": clusters
})
# Plot Clustering Results
plt.figure(figsize=(12, 6))
sns.scatterplot(data=cluster_df, x="X", y="Y", hue="Cluster", palette="Set2", s=100, alpha=0.8)
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("Named Entity Clustering using K-Means")
plt.legend(title="Cluster")
plt.grid(True)
plt.show()
# Display top entities in each cluster
for cluster in range(num_clusters):
cluster_entities = cluster_df[cluster_df["Cluster"] == cluster]["Entity"].unique()[:10] # Show top 10 per cluster
print(f"\nCluster {cluster}:")
print(", ".join(cluster_entities))
spaCy medium model not found. Installing now...
Collecting en-core-web-md==3.7.1
Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 42.8/42.8 MB 30.4 MB/s eta 0:00:00
Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.11/dist-packages (from en-core-web-md==3.7.1) (3.7.5)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.12)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.5)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.0.12)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.11)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.9)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.2.5)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.1.3)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.5.1)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.0.10)
Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.4.1)
Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.15.2)
Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.67.1)
Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.32.3)
Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.10.6)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.1.6)
Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (75.1.0)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (24.2)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.5.0)
Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.11/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.26.4)
Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.11/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.3.0)
Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.0)
Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.27.2)
Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (4.12.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.4.1)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2025.1.31)
Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.11/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.7.11)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.5)
Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (8.1.8)
Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.5.4)
Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (13.9.4)
Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.21.0)
Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.11/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (7.1.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.2)
Requirement already satisfied: marisa-trie>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.2.1)
Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (3.0.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (2.19.1)
Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (1.17.2)
Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-md==3.7.1) (0.1.2)
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.7.1
✔ Download and installation successful
You can now load the package via spacy.load('en_core_web_md')
⚠ Restart to reload dependencies
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Cluster 0: Digital Cluster 1: Quality, Recruit, Communication, Communicate, Advise, Program, Control Cluster 2: Health Cluster 3: Calculate, Record, Signal, Harvest, Drive, File, Fabricate, Follow, Track, Configure Cluster 4: Data, Science and Mathematics Digital, Direct, Art, Document ICT, Construction Digital, Develop ICT, Design Cluster 5: Administer non-intravenous, Administer, Evaluate, Detain, Construct, Coordinate, Operate, Vehicle, Weigh, Calibrate Cluster 6: Safety, Assess, Act, Recreation, Agriculture, Enforce, Plan, Care for animals Agriculture Cluster 7: Records Cluster 8: Review, Mark, Analyse, Customer, Distribute, Fashion, Material, Package, Connect, Collect Cluster 9: Science, Research, Develop, Security, Food, Environmental
In [152]:
!pip install gensim
Collecting gensim
Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Requirement already satisfied: numpy<2.0,>=1.18.5 in /usr/local/lib/python3.11/dist-packages (from gensim) (1.26.4)
Collecting scipy<1.14.0,>=1.7.0 (from gensim)
Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 60.6/60.6 kB 2.2 MB/s eta 0:00:00
Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.11/dist-packages (from gensim) (7.1.0)
Requirement already satisfied: wrapt in /usr/local/lib/python3.11/dist-packages (from smart-open>=1.8.1->gensim) (1.17.2)
Downloading gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.7/26.7 MB 26.6 MB/s eta 0:00:00
Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 38.6/38.6 MB 36.3 MB/s eta 0:00:00
Installing collected packages: scipy, gensim
Attempting uninstall: scipy
Found existing installation: scipy 1.14.1
Uninstalling scipy-1.14.1:
Successfully uninstalled scipy-1.14.1
Successfully installed gensim-4.3.3 scipy-1.13.1
In [153]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from gensim.corpora import Dictionary
from gensim.models.ldamodel import LdaModel
from gensim.models.coherencemodel import CoherenceModel
# Load data
# Assuming the data is in a CSV file and loaded into DataFrame `df`
# df = pd.read_csv('your_data.csv')
# Download NLTK resources
nltk.download('stopwords')
# Tokenization and removing punctuation
tokenizer = RegexpTokenizer(r'\w+')
# Stop words
stop_words = set(stopwords.words('english'))
# Function to preprocess text
def preprocess(text):
tokens = tokenizer.tokenize(text.lower())
filtered_tokens = [w for w in tokens if not w in stop_words]
return filtered_tokens
# Apply preprocessing to the 'Skills Statement' column
df['processed'] = df['Skills Statement'].map(preprocess)
[nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date!
In [154]:
# Create a dictionary representation of the documents
dictionary = Dictionary(df['processed'])
# Filter out extremes to remove noise and words that are too frequent
dictionary.filter_extremes(no_below=20, no_above=0.5)
# Create a corpus
corpus = [dictionary.doc2bow(text) for text in df['processed']]
In [155]:
# Train LDA model
lda_model = LdaModel(corpus=corpus,
id2word=dictionary,
num_topics=10,
random_state=100,
update_every=1,
chunksize=100,
passes=10,
alpha='auto',
per_word_topics=True)
In [156]:
top_words_per_topic = []
for topic_idx, topic in enumerate(lda_model.get_topics()):
top_features_ind = topic.argsort()[-10:][::-1] # Change 10 to however many words you want
top_features = [dictionary[id] for id in top_features_ind]
weights = topic[top_features_ind]
top_words_per_topic.append((topic_idx, list(zip(top_features, weights))))
# Print the top words along with their weights for each topic
for topic, words in top_words_per_topic:
print(f"Topic {topic + 1}:")
for word, weight in words:
print(f" {word} {weight:.4f}")
print("\n")
Topic 1: information 0.0763 relevant 0.0404 records 0.0391 according 0.0367 ensuring 0.0270 requirements 0.0229 ensure 0.0218 details 0.0206 accurate 0.0204 policies 0.0173 Topic 2: patient 0.0401 relevant 0.0373 issues 0.0305 data 0.0284 information 0.0254 service 0.0173 identify 0.0172 order 0.0147 involve 0.0139 analysis 0.0136 Topic 3: example 0.0491 environments 0.0363 social 0.0301 practices 0.0255 action 0.0169 wellbeing 0.0160 principles 0.0141 people 0.0137 information 0.0129 apply 0.0123 Topic 4: record 0.0468 include 0.0363 levels 0.0319 stock 0.0233 keeping 0.0228 including 0.0205 conducting 0.0199 maintain 0.0184 accordance 0.0182 monitoring 0.0166 Topic 5: include 0.0358 providing 0.0346 technical 0.0328 activities 0.0261 required 0.0225 information 0.0198 project 0.0193 processes 0.0188 operational 0.0183 specialist 0.0181 Topic 6: needs 0.0297 learning 0.0286 skills 0.0237 students 0.0197 increase 0.0196 development 0.0196 support 0.0181 student 0.0169 individuals 0.0163 training 0.0162 Topic 7: provide 0.0273 services 0.0245 individuals 0.0236 safely 0.0234 customers 0.0186 policies 0.0166 organisational 0.0165 resources 0.0156 support 0.0152 well 0.0147 Topic 8: activities 0.0523 staff 0.0492 area 0.0351 research 0.0330 conduct 0.0281 knowledge 0.0281 reviewing 0.0243 review 0.0226 provide 0.0200 assist 0.0199 Topic 9: medical 0.0237 care 0.0210 design 0.0173 plans 0.0161 treatment 0.0150 client 0.0146 needs 0.0140 involve 0.0136 patients 0.0136 relevant 0.0131 Topic 10: equipment 0.0332 safety 0.0227 work 0.0199 ensure 0.0189 materials 0.0169 order 0.0155 requirements 0.0146 standards 0.0134 procedures 0.0128 tools 0.0123
In [157]:
# Assuming lda_model, corpus, dictionary, and processed text are already defined
# Compute Coherence Score using c_v method
coherence_model_lda = CoherenceModel(model=lda_model, texts=df['processed'], dictionary=dictionary, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)
Coherence Score: 0.43381428611824047